Ejemplo n.º 1
0
def _trim_alignments(run_dir, dna_alignments, retained_threshold,
                     max_indel_length, stats_file, scatterplot_file):
    """Trim all DNA alignments using _trim_alignment (singular), and calculate some statistics about the trimming."""
    log.info(
        'Trimming {0} DNA alignments from first non-gap codon to last non-gap codon'
        .format(len(dna_alignments)))

    # Create directory here, to prevent race-condition when folder does not exist, but is then created by another process
    trimmed_dir = create_directory('trimmed', inside_dir=run_dir)

    # Trim all the alignments
    trim_tpls = [
        _trim_alignment((trimmed_dir, dna_alignment, max_indel_length))
        for dna_alignment in dna_alignments
    ]

    remaining_percts = [tpl[3] for tpl in trim_tpls]
    trimmed_alignments = [
        tpl[0] for tpl in trim_tpls if retained_threshold <= tpl[3]
    ]
    misaligned = [tpl[0] for tpl in trim_tpls if retained_threshold > tpl[3]]

    # Write trim statistics to file in such a way that they're easily converted to a graph in Galaxy
    with open(stats_file, mode='w') as append_handle:
        msg = '{0:6} sequence alignments trimmed'.format(len(trim_tpls))
        log.info(msg)
        append_handle.write('#' + msg + '\n')

        average_retained = sum(remaining_percts) / len(remaining_percts)
        msg = '{0:5.1f}% sequence retained on average overall'.format(
            average_retained)
        log.info(msg)
        append_handle.write('#' + msg + '\n')

        filtered = len(misaligned)
        msg = '{0:6} orthologs filtered because less than {1}% sequence retained or because of indel longer than {2} '\
            .format(filtered, str(retained_threshold), max_indel_length)
        log.info(msg)
        append_handle.write('#' + msg + '\n')

        append_handle.write(
            '# Trimmed file\tOriginal length\tTrimmed length\tPercentage retained\n'
        )
        for tpl in sorted(trim_tpls, key=itemgetter(3)):
            append_handle.write(os.path.split(tpl[0])[1] + '\t')
            append_handle.write(str(tpl[1]) + '\t')
            append_handle.write(str(tpl[2]) + '\t')
            append_handle.write('{0:.2f}\n'.format(tpl[3]))

    # Create scatterplot using trim_tuples
    scatterplot(retained_threshold, trim_tpls, scatterplot_file)

    return sorted(trimmed_alignments), sorted(misaligned)
Ejemplo n.º 2
0
def _one_scatterplot(cl_metadata, xy_data, xy_metadata,
                     output=None,
                     lims=None, specs=None,

                     _spd=sp.ScatterplotData,
                     _anno=co.namedtuple('_annotation',
                                         CellLineMetadata._fields +
                                         ('x', 'y')),
                     _annopxl=co.namedtuple('_annotated_pixel',
                                            'coords annotation')):
    if specs is None:
        specs = get_specs(cl_metadata)

    readouts = zip(*xy_data)
    if lims is None and WITHLIMITS:
        lims = limits(readouts)

    points = tuple(_spd(*(k + (x, y))) for k, x, y in zip(specs, *xy_data))

    fig = sp.scatterplot(points, xy_metadata, lims=lims, outpath=output)
    pixels = sp.pixels(points, fig)

    annotations = tuple(_anno(*(m + r))
                        for m, r in zip(cl_metadata, readouts))

    return tuple(sorted([_annopxl(p, a) for p, a in zip(pixels, annotations)],
                        key=lambda r: (r.coords.y, r.coords.x,
                                       r.annotation.cell_line_name)))
Ejemplo n.º 3
0
def _one_scatterplot(
    cl_metadata,
    xy_data,
    xy_metadata,
    output=None,
    lims=None,
    specs=None,
    _spd=sp.ScatterplotData,
    _anno=co.namedtuple('_annotation', CellLineMetadata._fields + ('x', 'y')),
    _annopxl=co.namedtuple('_annotated_pixel', 'coords annotation')):
    if specs is None:
        specs = get_specs(cl_metadata)

    readouts = zip(*xy_data)
    if lims is None and WITHLIMITS:
        lims = limits(readouts)

    points = tuple(_spd(*(k + (x, y))) for k, x, y in zip(specs, *xy_data))

    fig = sp.scatterplot(points, xy_metadata, lims=lims, outpath=output)
    pixels = sp.pixels(points, fig)

    annotations = tuple(_anno(*(m + r)) for m, r in zip(cl_metadata, readouts))

    return tuple(
        sorted([_annopxl(p, a) for p, a in zip(pixels, annotations)],
               key=lambda r:
               (r.coords.y, r.coords.x, r.annotation.cell_line_name)))
Ejemplo n.º 4
0
def _trim_alignments(run_dir, dna_alignments, retained_threshold, max_indel_length, stats_file, scatterplot_file):
    """Trim all DNA alignments using _trim_alignment (singular), and calculate some statistics about the trimming."""
    log.info('Trimming {0} DNA alignments from first non-gap codon to last non-gap codon'.format(len(dna_alignments)))

    # Create directory here, to prevent race-condition when folder does not exist, but is then created by another process
    trimmed_dir = create_directory('trimmed', inside_dir=run_dir)

    # Trim all the alignments
    trim_tpls = [_trim_alignment((trimmed_dir, dna_alignment, max_indel_length)) for dna_alignment in dna_alignments]

    remaining_percts = [tpl[3] for tpl in trim_tpls]
    trimmed_alignments = [tpl[0] for tpl in trim_tpls if retained_threshold <= tpl[3]]
    misaligned = [tpl[0] for tpl in trim_tpls if retained_threshold > tpl[3]]

    # Write trim statistics to file in such a way that they're easily converted to a graph in Galaxy
    with open(stats_file, mode='w') as append_handle:
        msg = '{0:6} sequence alignments trimmed'.format(len(trim_tpls))
        log.info(msg)
        append_handle.write('#' + msg + '\n')

        average_retained = sum(remaining_percts) / len(remaining_percts)
        msg = '{0:5.1f}% sequence retained on average overall'.format(average_retained)
        log.info(msg)
        append_handle.write('#' + msg + '\n')

        filtered = len(misaligned)
        msg = '{0:6} orthologs filtered because less than {1}% sequence retained or because of indel longer than {2} '\
            .format(filtered, str(retained_threshold), max_indel_length)
        log.info(msg)
        append_handle.write('#' + msg + '\n')

        append_handle.write('# Trimmed file\tOriginal length\tTrimmed length\tPercentage retained\n')
        for tpl in sorted(trim_tpls, key=itemgetter(3)):
            append_handle.write(os.path.split(tpl[0])[1] + '\t')
            append_handle.write(str(tpl[1]) + '\t')
            append_handle.write(str(tpl[2]) + '\t')
            append_handle.write('{0:.2f}\n'.format(tpl[3]))

    # Create scatterplot using trim_tuples
    scatterplot(retained_threshold, trim_tpls, scatterplot_file)

    return sorted(trimmed_alignments), sorted(misaligned)
Ejemplo n.º 5
0
def handle_input(database_filename, conn, cur):
    user_input = input("""
    Please enter a number (1-7) to perform an action:
    Please perform actions 1, 2, and 3 in order before anything else.

        API
            (1) Collect 20 headlines and abstracts from New York Times API
            (2) Collect 20 sets of text sentiment values for headlines and abstracts from Parallel Dots API

        CALCULATE
            (3) Calculate wellbeing scores for headlines/abstracts

        VISUALIZE
            (4) Compose scatter plot
            (5) Compose histogram chart
            (6) Compose pie chart
        
        OTHER
            (7) Quit

    User input: """)

    if user_input == "1":
        get_headlines_and_abstracts(database_filename, conn, cur)
    elif user_input == "2":
        call_sentiment_api(database_filename, conn, cur)
    elif user_input == "3":
        calculate_wellbeing_scores(database_filename, conn, cur)
    elif user_input == "4":
        scatterplot()
    elif user_input == "5":
        compose_bar_chart()
    elif user_input == "6":
        piechart(database_filename, conn, cur)
    elif user_input == "7":
        """Implement"""
        return False
    # else
    return True
Ejemplo n.º 6
0
def write_scatterplot(output, points, axis_labels, lims=None):
    # calls sp.scatterplot
    # outputs a scatterplot to output
    with open(output, 'w') as outfh:
        fig = sp.scatterplot(points, axis_labels, lims)
        outfh.write(fig)
Ejemplo n.º 7
0
def write_scatterplot(output, points, axis_labels, lims=None):
    # calls sp.scatterplot
    # outputs a scatterplot to output
    with open(output, 'w') as outfh:
        fig = sp.scatterplot(points, axis_labels, lims)
        outfh.write(fig)
Ejemplo n.º 8
0
 k=int(input("Please enter the number: "))
 while k!=1 and k!=2 and k!=3:
     k=int(input("Wrong input! Please enter a number from 0 to 3: "))       
 if k == 1:
     print("Display the information of Desciptive Analysis and Frequency in Comics: ")
     fa.first_avengers()
     #print(fa.first_avengers().df.describe())
     print("Go back to MENU.")
     Menu()
     i=int(input("Please enter the number: "))
     while i!=0 and i!=1 and i!=2 and i!=3:
         i=int(input("Wrong input! Please enter a number from 0 to 3: "))
 elif k == 2:
     print("Display the information of Frequency in Comics, Relationship of Power and Frequency, and Power Distribution")
     bc.barchart()
     sp.scatterplot()
     cs.chart_skill()
     print("Go back to MENU.")
     Menu()
     i=int(input("Please enter the number: "))
     while i!=0 and i!=1 and i!=2 and i!=3:
         i=int(input("Wrong input! Please enter a number from 0 to 3: "))
 elif k == 3:
     print("Display the information of Popularity, Meta Score VS Audience Rating, Comment Visualization")
     pfp.Plot_for_Popularity()            
     print("Wordcloud image is generating...Please wait patiently.")
     wc.word_cloud()
     print("Go back to MENU.")
     Menu()
     i=int(input("Please enter the number: "))
     while i!=0 and i!=1 and i!=2 and i!=3: