def do_dendogram( args, fsa_list, dbh ): from fatools.lib.fautil import hclustalign from matplotlib import pyplot as plt for (fsa, sample_code) in fsa_list: c = fsa.get_ladder_channel() c.scan(params.Params()) # scan first if necessary ladder = fsa.panel.get_ladder() peaks = c.get_alleles() #initial_pair, P, L = hclustalign.hclust_align(peaks, ladder) P = hclustalign.generate_tree( [ (n.rtime, 0) for n in peaks ] ) L = hclustalign.generate_tree( [ (e, 0) for e in ladder['sizes'] ] ) clusters = hclustalign.fcluster(L.z, args.cluster or ladder['k'], criterion="maxclust") print(clusters) clusters = hclustalign.fcluster(P.z, args.cluster or ladder['k'], criterion="maxclust") print(clusters) plt.figure() plt.subplot(121) hclustalign.dendrogram(L.z, leaf_rotation=90, leaf_font_size=8, labels = [ x[0] for x in L.p ]) plt.subplot(122) hclustalign.dendrogram(P.z, leaf_rotation=90, leaf_font_size=8, labels = [ x[0] for x in P.p ]) plt.show()
def do_scan( args, dbh ): cerr('I: Scanning peaks...') scanning_parameter = params.Params() assay_list = get_assay_list( args, dbh ) """ if args.peakcachedb: import leveldb peakdb = leveldb.LevelDB(args.peakcachedb, create_if_missing=False) else: peakdb = None """ peakdb = None if args.method: scanning_parameter.ladder.method = args.method scanning_parameter.nonladder.method = args.method counter = 1 for (assay, sample_code) in assay_list: cerr('I: [%d/%d] - Scanning: %s | %s' % (counter, len(assay_list), sample_code, assay.filename)) assay.scan( scanning_parameter, peakdb = peakdb ) counter += 1
def do_align( args, fsa_list, dbh ): cerr('I: Aligning size standards...') for (fsa, sample_code) in fsa_list: cverr(3, 'D: aligning FSA %s' % fsa.filename) fsa.align(params.Params())
def do_call(args, fsa_list, dbh): cerr('I: Calling non-ladder peaks...') for (fsa, sample_code) in fsa_list: cverr(3, 'D: calling FSA %s' % fsa.filename) fsa.call(params.Params(), args.marker)
def do_call(args, dbh): cerr('I: Calling peaks...') scanning_parameter = params.Params() assay_list = get_assay_list( args, dbh ) counter = 1 for (assay, sample_code) in assay_list: cerr('I: [%d/%d] - Calling: %s | %s' % (counter, len(assay_list), sample_code, assay.filename)) assay.call( scanning_parameter ) counter += 1
def align_fsa(fsa): """ Align fsa to prepare for size and retention time extraction from each allele. Input ----- fsa: class of fsa import Params() from fatools.lib.params for parameter in fsa alignment Output ------ fsa that has been aligned """ fsa.align(params.Params())
def do_binsummary(args, dbh): from fatools.lib.analytics.summary import summarize_bins scanning_parameter = params.Params() markers = None for i in range(args.iteration): query = get_query(args, dbh) analytical_sets = query.get_filtered_analytical_sets() report = summarize_bins(analytical_sets) cerr('I: Bin summary iteration %d' % i) pprint(report) markers = [] for (marker_id, updated_bins) in report.items(): marker = dbh.get_marker_by_id(marker_id) marker.adjustbins(updated_bins) markers.append(marker) dbh.session().flush() # rebinning cerr('I: Rebinning samples') assay_list = [] N = len(analytical_sets.sample_ids) count = 1 for sample_id in analytical_sets.sample_ids: sample = dbh.get_sample_by_id(sample_id) cerr('\rI: [%d/%d] - Binning sample...' % (count, N), nl=False) for assay in sample.assays: assay.bin(scanning_parameter.nonladder, markers) count += 1 cerr('') dbh.session().flush() if args.outfile: output_dict = {} for marker in markers: output_dict[marker.label] = { 'label': marker.label, 'bins': marker.bins } with open(args.outfile, 'wt') as f: yaml.dump(output_dict, f) cerr('I: writing bins to %s' % args.outfile)
def do_bin(args, dbh): cerr('I: Binning peaks...') scanning_parameter = params.Params() if args.marker: markers = [dbh.get_marker(code) for code in args.marker.split(',')] else: markers = None assay_list = get_assay_list(args, dbh) counter = 1 for (assay, sample_code) in assay_list: cerr('I: [%d/%d] - Binning: %s | %s' % (counter, len(assay_list), sample_code, assay.filename)) assay.bin(scanning_parameter, markers) counter += 1
def do_findpeaks( args, dbh ): #import leveldb from fatools.lib import params cerr('Finding and caching peaks...') if not args.peakcachedb: cexit('ERR - please provide cache db filename') """ # opening LevelDB database if args.peakcachedb == '-': peakdb = None else: peakdb = leveldb.LevelDB(args.peakcachedb) """ peakdb = None scanning_parameter = params.Params() assay_list = get_assay_list( args, dbh ) if args.method: scanning_parameter.ladder.method = args.method scanning_parameter.nonladder.method = args.method channel_list = [] counter = 1 cerr('', nl=False) for (assay, sample_code) in assay_list: cerr('\rI: [%d/%d] processing assay' % (counter, len(assay_list)), nl=False) for c in assay.channels: if c.marker.code == 'ladder': params = scanning_parameter.ladder else: params = scanning_parameter.nonladder channel_list.append( (c.tag(), c.data, params) ) counter += 1 cerr('') do_parallel_find_peaks( channel_list, peakdb )
def do_analyze(args): """ open a tracefile, performs fragment analysis (scan & call only) """ from fatools.lib.fautil.traceio import read_abif_stream from fatools.lib.fautil.traceutils import separate_channels from fatools.lib.fsmodels.models import Assay, Marker, Panel from fatools.lib import params scanning_parameter = params.Params() # create dummy markers ladder = Marker('ladder', 10, 600, 0, None) # create dummy panel dummy_panel = Panel( '-', { 'ladder': args.sizestandard, 'markers': {}, }) with open(args.file, 'rb') as in_stream: cerr('Reading FSA file: %s' % args.file) t = read_abif_stream(in_stream) # create a new Assay and add trace assay = Assay() assay.size_standard = args.sizestandard assay._trace = t # create all channels assay.create_channels() # assign all channels assay.assign_channels( panel = dummy_panel ) # scan for peaks assay.scan(scanning_parameter)
def do_postannotate(args, dbh): cerr('I: Post-annotating peaks...') scanning_parameter = params.Params() if args.marker: markers = [dbh.get_marker(code) for code in args.marker.split(',')] else: markers = None if args.stutter_ratio > 0: scanning_parameter.nonladder.stutter_ratio = args.stutter_ratio if args.stutter_range > 0: scanning_parameter.nonladder.stutter_range = args.stutter_range assay_list = get_assay_list(args, dbh) counter = 1 for (assay, sample_code) in assay_list: cerr('I: [%d/%d] - Post-annotating: %s | %s' % (counter, len(assay_list), sample_code, assay.filename)) assay.postannotate(scanning_parameter, markers) counter += 1
def main(): p = argparse.ArgumentParser('test_fatools') p.add_argument('--type', default='', help = "type of test") args = p.parse_args() print("args: ", args) trace_dir = "116" file_list = "05-M13ii-polD-5min.fsa" files = {} # dictionary containing directories and names of files within directories files['2'] = trace_dir # get FSA from fatools.lib.fileio.models import Marker, Panel, FSA Panel.upload(params.default_panels) Marker.upload(params.default_markers) panel = Panel.get_panel("GS120LIZ") fsa_list = [] index = 1 # set parameters for baseline correction from fatools.lib.const import allelemethod, baselinemethod _params = params.Params() _params.baselinewindow = 51 _params.baselinemethod = baselinemethod.minimum _params.ladder.min_rfu = 500 _params.ladder.min_rfu_ratio = 0.2 for fsa_filename in file_list.split(','): fsa_filename = fsa_filename.strip() filename = trace_dir + "/" + fsa_filename fsa = FSA.from_file(filename, panel, _params, cache = False) fsa_list.append( (fsa, str(index)) ) index += 1 if args.type == 'allelemethods': import matplotlib.pyplot as plt import numpy as np fig = plt.figure() (fsa, fsa_index) = fsa_list[0] print('D: aligning FSA %s' % fsa.filename) try: fsa.align(_params) except LadderMismatchException: print(("LadderMismatch: %s\n") % fsa.filename) c = fsa.get_ladder_channel() # get ladder and times for peaks fit to ladder ladder_sizes = fsa.panel.get_ladder()['sizes'] alleles = c.get_alleles() allele_sizes = [allele.rtime for allele in alleles] plt.plot(allele_sizes, ladder_sizes, 'p', label='peaks matched to ladder steps') for method in [ allelemethod.leastsquare, allelemethod.cubicspline, allelemethod.localsouthern ]: #for method in [ allelemethod.localsouthern ]: print("\nmethod: ", method) _params.allelemethod = method # call align again just to set the allelemethod print('D: aligning FSA %s' % fsa.filename) try: fsa.align(_params) except LadderMismatchException: print(("LadderMismatch: %s\n") % fsa.filename) func = fsa.allele_fit_func # plot fit of ladder scan times to base pairs fit = np.poly1d(c.fsa.z) #x = np.arange(allele_sizes[0] - 150, allele_sizes[-1] + 100) # len(c.data)) x = np.arange(800, allele_sizes[-1] + 100) # len(c.data)) vecfunc = np.vectorize(func) print("vecfunc([1,2,3])=", vecfunc([1,2,3])) y_all = vecfunc(x) plt.plot(x, vecfunc(x)[0], label=method) plt.legend() plt.xlabel("peak scan times") plt.ylabel("# base pairs") plt.show()
def main(args): if args.verbose != 0: set_verbosity(args.verbose) dbh = None # set parameter for baseline correction and allelemethod from fatools.lib.const import allelemethod, baselinemethod _params = params.Params() _params.baselinewindow = args.baselinewindow if args.baselinemethod != "": if args.baselinemethod == 'none': _params.baselinemethod = baselinemethod.none elif args.baselinemethod == 'median': _params.baselinemethod = baselinemethod.median elif args.baselinemethod == 'minimum': _params.baselinemethod = baselinemethod.minimum else: raise NotImplementedError() if args.allelemethod != "": if args.allelemethod == 'leastsquare': _params.allelemethod = allelemethod.leastsquare elif args.allelemethod == 'cubicspline': _params.allelemethod = allelemethod.cubicspline elif args.allelemethod == 'localsouthern': _params.allelemethod = allelemethod.localsouthern else: raise NotImplementedError() if args.nonladder_smoothing_window > 0: _params.nonladder.smoothing_window = args.nonladder_smoothing_window _params.nonladder.smoothing_order = args.nonladder_smoothing_order cerr('I: Aligning size standards...') if args.file or args.infile or args.indir: cverr(4, 'D: opening FSA file(s)') fsa_list = open_fsa(args, _params) elif dbh is None: cverr(4, 'D: connecting to database') dbh = get_dbhandler(args) fsa_list = get_fsa_list(args, dbh) cerr('I: obtained %d FSA' % len(fsa_list)) if args.commit: with transaction.manager: do_facmd(args, fsa_list, dbh) cerr('** COMMIT to database **') elif dbh: cerr( 'WARNING ** running without database COMMIT! All changes will be discarded!' ) if not (args.test or args.y): keys = input('Do you want to continue [y/n]? ') if not keys.lower().strip().startswith('y'): sys.exit(1) do_facmds(args, fsa_list, _params, dbh) else: do_facmds(args, fsa_list, _params)