コード例 #1
0
ファイル: cmds.py プロジェクト: bwlang/fatools
def do_dendogram( args, fsa_list, dbh ):

    from fatools.lib.fautil import hclustalign
    from matplotlib import pyplot as plt

    for (fsa, sample_code) in fsa_list:

        c = fsa.get_ladder_channel()
        c.scan(params.Params()) # scan first if necessary

        ladder = fsa.panel.get_ladder()
        peaks = c.get_alleles()

        #initial_pair, P, L = hclustalign.hclust_align(peaks, ladder)
        P = hclustalign.generate_tree( [ (n.rtime, 0) for n in peaks ] )
        L = hclustalign.generate_tree( [ (e, 0) for e in ladder['sizes'] ] )

        clusters = hclustalign.fcluster(L.z, args.cluster or ladder['k'], criterion="maxclust")
        print(clusters)

        clusters = hclustalign.fcluster(P.z, args.cluster or ladder['k'], criterion="maxclust")
        print(clusters)

        plt.figure()
        plt.subplot(121)
        hclustalign.dendrogram(L.z, leaf_rotation=90, leaf_font_size=8,
                labels = [ x[0] for x in L.p ])
        plt.subplot(122)
        hclustalign.dendrogram(P.z, leaf_rotation=90, leaf_font_size=8,
                labels = [ x[0] for x in P.p ])
        plt.show()
コード例 #2
0
ファイル: facmd.py プロジェクト: nebiolabs/fatools
def do_scan( args, dbh ):

    cerr('I: Scanning peaks...')

    scanning_parameter = params.Params()
    assay_list = get_assay_list( args, dbh )

    """
    if args.peakcachedb:
        import leveldb
        peakdb = leveldb.LevelDB(args.peakcachedb, create_if_missing=False)
    else:
        peakdb = None
    """
    peakdb = None

    if args.method:
        scanning_parameter.ladder.method = args.method
        scanning_parameter.nonladder.method = args.method

    counter = 1
    for (assay, sample_code) in assay_list:
        cerr('I: [%d/%d] - Scanning: %s | %s' %
                (counter, len(assay_list), sample_code, assay.filename))

        assay.scan( scanning_parameter, peakdb = peakdb )
        counter += 1
コード例 #3
0
ファイル: cmds.py プロジェクト: bwlang/fatools
def do_align( args, fsa_list, dbh ):

    cerr('I: Aligning size standards...')

    for (fsa, sample_code) in fsa_list:
        cverr(3, 'D: aligning FSA %s' % fsa.filename)
        fsa.align(params.Params())
コード例 #4
0
ファイル: cmds.py プロジェクト: mkdryden/fatools
def do_call(args, fsa_list, dbh):

    cerr('I: Calling non-ladder peaks...')

    for (fsa, sample_code) in fsa_list:
        cverr(3, 'D: calling FSA %s' % fsa.filename)
        fsa.call(params.Params(), args.marker)
コード例 #5
0
ファイル: facmd.py プロジェクト: nebiolabs/fatools
def do_call(args, dbh):

    cerr('I: Calling peaks...')

    scanning_parameter = params.Params()

    assay_list = get_assay_list( args, dbh )
    counter = 1
    for (assay, sample_code) in assay_list:
        cerr('I: [%d/%d] - Calling: %s | %s' %
                (counter, len(assay_list), sample_code, assay.filename))
        assay.call( scanning_parameter )
        counter += 1
コード例 #6
0
ファイル: plot.py プロジェクト: mkdryden/fatools
def align_fsa(fsa):
    """
    Align fsa to prepare for size and retention time extraction from each allele.

    Input
    -----
    fsa: class of fsa
    import Params() from fatools.lib.params for parameter in fsa alignment

    Output
    ------
    fsa that has been aligned
    """
    fsa.align(params.Params())
コード例 #7
0
ファイル: analyze.py プロジェクト: mkdryden/fatools
def do_binsummary(args, dbh):

    from fatools.lib.analytics.summary import summarize_bins

    scanning_parameter = params.Params()

    markers = None
    for i in range(args.iteration):
        query = get_query(args, dbh)
        analytical_sets = query.get_filtered_analytical_sets()
        report = summarize_bins(analytical_sets)
        cerr('I: Bin summary iteration %d' % i)
        pprint(report)

        markers = []
        for (marker_id, updated_bins) in report.items():
            marker = dbh.get_marker_by_id(marker_id)
            marker.adjustbins(updated_bins)
            markers.append(marker)
        dbh.session().flush()

        # rebinning
        cerr('I: Rebinning samples')
        assay_list = []
        N = len(analytical_sets.sample_ids)
        count = 1
        for sample_id in analytical_sets.sample_ids:
            sample = dbh.get_sample_by_id(sample_id)
            cerr('\rI: [%d/%d] - Binning sample...' % (count, N), nl=False)
            for assay in sample.assays:
                assay.bin(scanning_parameter.nonladder, markers)
            count += 1
        cerr('')
        dbh.session().flush()

    if args.outfile:

        output_dict = {}
        for marker in markers:
            output_dict[marker.label] = {
                'label': marker.label,
                'bins': marker.bins
            }

        with open(args.outfile, 'wt') as f:
            yaml.dump(output_dict, f)
        cerr('I: writing bins to %s' % args.outfile)
コード例 #8
0
ファイル: facmd.py プロジェクト: mkdryden/fatools
def do_bin(args, dbh):

    cerr('I: Binning peaks...')

    scanning_parameter = params.Params()

    if args.marker:
        markers = [dbh.get_marker(code) for code in args.marker.split(',')]
    else:
        markers = None

    assay_list = get_assay_list(args, dbh)
    counter = 1
    for (assay, sample_code) in assay_list:
        cerr('I: [%d/%d] - Binning: %s | %s' %
             (counter, len(assay_list), sample_code, assay.filename))
        assay.bin(scanning_parameter, markers)
        counter += 1
コード例 #9
0
ファイル: facmd.py プロジェクト: nebiolabs/fatools
def do_findpeaks( args, dbh ):

    #import leveldb
    from fatools.lib import params

    cerr('Finding and caching peaks...')

    if not args.peakcachedb:
        cexit('ERR - please provide cache db filename')

    """
    # opening LevelDB database
    if args.peakcachedb == '-':
        peakdb = None
    else:
        peakdb = leveldb.LevelDB(args.peakcachedb)
    """
    peakdb = None

    scanning_parameter = params.Params()
    assay_list = get_assay_list( args, dbh )

    if args.method:
        scanning_parameter.ladder.method = args.method
        scanning_parameter.nonladder.method = args.method

    channel_list = []
    counter = 1
    cerr('', nl=False)
    for (assay, sample_code) in assay_list:
        cerr('\rI: [%d/%d] processing assay' % (counter, len(assay_list)), nl=False)
        for c in assay.channels:
            if c.marker.code == 'ladder':
                params = scanning_parameter.ladder
            else:
                params = scanning_parameter.nonladder
            channel_list.append( (c.tag(), c.data, params) )
        counter += 1
    cerr('')

    do_parallel_find_peaks( channel_list, peakdb )
コード例 #10
0
def do_analyze(args):
    """ open a tracefile, performs fragment analysis (scan & call only)
    """

    from fatools.lib.fautil.traceio import read_abif_stream
    from fatools.lib.fautil.traceutils import separate_channels
    from fatools.lib.fsmodels.models import Assay, Marker, Panel
    from fatools.lib import params

    scanning_parameter = params.Params()

    # create dummy markers
    ladder = Marker('ladder', 10, 600, 0, None)

    # create dummy panel
    dummy_panel = Panel( '-', {
        'ladder': args.sizestandard,
        'markers': {},
    })

    with open(args.file, 'rb') as in_stream:
        cerr('Reading FSA file: %s' % args.file)
        t = read_abif_stream(in_stream)

    # create a new Assay and add trace
    assay = Assay()
    assay.size_standard = args.sizestandard
    assay._trace = t

    # create all channels
    assay.create_channels()

    # assign all channels
    assay.assign_channels( panel = dummy_panel )


    # scan for peaks
    assay.scan(scanning_parameter)
コード例 #11
0
ファイル: facmd.py プロジェクト: mkdryden/fatools
def do_postannotate(args, dbh):

    cerr('I: Post-annotating peaks...')

    scanning_parameter = params.Params()

    if args.marker:
        markers = [dbh.get_marker(code) for code in args.marker.split(',')]
    else:
        markers = None

    if args.stutter_ratio > 0:
        scanning_parameter.nonladder.stutter_ratio = args.stutter_ratio
    if args.stutter_range > 0:
        scanning_parameter.nonladder.stutter_range = args.stutter_range

    assay_list = get_assay_list(args, dbh)
    counter = 1
    for (assay, sample_code) in assay_list:
        cerr('I: [%d/%d] - Post-annotating: %s | %s' %
             (counter, len(assay_list), sample_code, assay.filename))
        assay.postannotate(scanning_parameter, markers)
        counter += 1
コード例 #12
0
ファイル: test.py プロジェクト: nebiolabs/fatools
def main():

    p = argparse.ArgumentParser('test_fatools')
    p.add_argument('--type', default='', help = "type of test")

    args = p.parse_args()

    print("args: ", args)
    
    trace_dir = "116"
    file_list = "05-M13ii-polD-5min.fsa"
    
    files = {} # dictionary containing directories and names of files within directories
    files['2'] = trace_dir

    # get FSA
    from fatools.lib.fileio.models import Marker, Panel, FSA

    Panel.upload(params.default_panels)
    Marker.upload(params.default_markers)
    
    panel = Panel.get_panel("GS120LIZ")
    fsa_list = []
    index = 1

    # set parameters for baseline correction
    from fatools.lib.const import allelemethod, baselinemethod
    
    _params = params.Params()
    _params.baselinewindow = 51
    _params.baselinemethod = baselinemethod.minimum
    _params.ladder.min_rfu = 500
    _params.ladder.min_rfu_ratio = 0.2
    
    for fsa_filename in file_list.split(','):

        fsa_filename = fsa_filename.strip()
        filename = trace_dir + "/" + fsa_filename

        fsa = FSA.from_file(filename, panel, _params, cache = False)
        fsa_list.append( (fsa, str(index)) )
        index += 1

    if args.type == 'allelemethods':
        
        import matplotlib.pyplot as plt
        import numpy as  np

        fig = plt.figure()
        
        (fsa, fsa_index) = fsa_list[0]

        print('D: aligning FSA %s' % fsa.filename)
        try:
            fsa.align(_params)
        except LadderMismatchException:
            print(("LadderMismatch: %s\n") % fsa.filename)
            
        c = fsa.get_ladder_channel()
        
        # get ladder and times for peaks fit to ladder
        ladder_sizes = fsa.panel.get_ladder()['sizes']
        alleles = c.get_alleles()
        allele_sizes = [allele.rtime for allele in alleles]
        
        plt.plot(allele_sizes, ladder_sizes, 'p',
                 label='peaks matched to ladder steps')

        for method in [ allelemethod.leastsquare, allelemethod.cubicspline, allelemethod.localsouthern ]:
        #for method in [ allelemethod.localsouthern ]:
            
            print("\nmethod: ", method)
            
            _params.allelemethod = method

            # call align again just to set the allelemethod
            print('D: aligning FSA %s' % fsa.filename)
            try:
                fsa.align(_params)
            except LadderMismatchException:
                print(("LadderMismatch: %s\n") % fsa.filename)

            func = fsa.allele_fit_func 

            # plot fit of ladder scan times to base pairs
            fit = np.poly1d(c.fsa.z)
            #x = np.arange(allele_sizes[0] - 150, allele_sizes[-1] + 100)  # len(c.data))
            x = np.arange(800, allele_sizes[-1] + 100)  # len(c.data))
            vecfunc = np.vectorize(func)
            print("vecfunc([1,2,3])=", vecfunc([1,2,3]))
            y_all  = vecfunc(x)
            plt.plot(x, vecfunc(x)[0], label=method)
            
        plt.legend()
        plt.xlabel("peak scan times")
        plt.ylabel("# base pairs")
        
        plt.show()
コード例 #13
0
def main(args):

    if args.verbose != 0:
        set_verbosity(args.verbose)

    dbh = None

    # set parameter for baseline correction and allelemethod
    from fatools.lib.const import allelemethod, baselinemethod
    _params = params.Params()

    _params.baselinewindow = args.baselinewindow

    if args.baselinemethod != "":
        if args.baselinemethod == 'none':
            _params.baselinemethod = baselinemethod.none
        elif args.baselinemethod == 'median':
            _params.baselinemethod = baselinemethod.median
        elif args.baselinemethod == 'minimum':
            _params.baselinemethod = baselinemethod.minimum
        else:
            raise NotImplementedError()

    if args.allelemethod != "":
        if args.allelemethod == 'leastsquare':
            _params.allelemethod = allelemethod.leastsquare
        elif args.allelemethod == 'cubicspline':
            _params.allelemethod = allelemethod.cubicspline
        elif args.allelemethod == 'localsouthern':
            _params.allelemethod = allelemethod.localsouthern
        else:
            raise NotImplementedError()

    if args.nonladder_smoothing_window > 0:
        _params.nonladder.smoothing_window = args.nonladder_smoothing_window
        _params.nonladder.smoothing_order = args.nonladder_smoothing_order

    cerr('I: Aligning size standards...')
    if args.file or args.infile or args.indir:
        cverr(4, 'D: opening FSA file(s)')
        fsa_list = open_fsa(args, _params)
    elif dbh is None:
        cverr(4, 'D: connecting to database')
        dbh = get_dbhandler(args)
        fsa_list = get_fsa_list(args, dbh)

    cerr('I: obtained %d FSA' % len(fsa_list))

    if args.commit:
        with transaction.manager:
            do_facmd(args, fsa_list, dbh)
            cerr('** COMMIT to database **')
    elif dbh:
        cerr(
            'WARNING ** running without database COMMIT! All changes will be discarded!'
        )
        if not (args.test or args.y):
            keys = input('Do you want to continue [y/n]? ')
            if not keys.lower().strip().startswith('y'):
                sys.exit(1)
        do_facmds(args, fsa_list, _params, dbh)
    else:
        do_facmds(args, fsa_list, _params)