コード例 #1
0
ファイル: plot.py プロジェクト: edawine/fatools
def check_and_prepare_pdf(plot_file):
    """
    Check if format is supported by matplotlib, then determine if
    PdfPages object needs to be prepared for plotting to pdf.

    Input
    -----
    plot_file: string of plot file name and format

    Output
    ------
    plot_file: PdfPages object with plot_file name if format is '.pdf'
    """
    if plot_file is not None:
        plot_file_ext = splitext(plot_file)[-1]
        if plot_file_ext == '.pdf':
            plot_file = PdfPages(plot_file)
        else:
            try:
                plt.savefig(plot_file)
            except ValueError:
                cerr('E: Format {} is not supported!'.format(plot_file_ext))
                cexit('Exiting...')

    return plot_file
コード例 #2
0
ファイル: fautil.py プロジェクト: edawine/fatools
def get_traces(args, dbh):

    traces = []

    if dbh is None:
        # get from infile
        infile = args.file
        if infile is False:
            cexit('E - Please provide a filename or Sqlite database path')

        abspath = os.path.abspath( args.file )

        if abspath in cache_traces:
            traces.append((abspath, cache_traces[abspath]))

        else:
            from fatools.lib.fautil.traceio import read_abif_stream
            with open( abspath, 'rb') as instream:
                t = read_abif_stream(instream)
                cache_traces[abspath] = t
                traces.append((abspath, t))

    else:
        pass

    return traces
コード例 #3
0
ファイル: convert.py プロジェクト: edawine/fatools
def do_genemapper2tab(args, dbh):

    species = None
    if args.species: species = args.species

    for infile in args.infiles:

        sample_set = defaultdict(list)
        csv_in = csv.DictReader( open(infile) )
        assay_list = {}

        for row in csv_in:
            assay = row['Sample File']
            sample = row['Sample Name']
            run_name = row['Run Name']
            panel = row['Panel']
            marker = row['Marker']

            if assay in assay_list:
                if assay_list[assay] != run_name:
                    cexit('Inconsistence or duplicate FSA file name: %s' % assay)
            else:
                assay_list[assay] = run_name

            token = (sample, assay, panel)
            sample_set[token].append( marker )


        outfile = open(infile + '.tab', 'w')
        outfile.write('SAMPLE\tASSAY\tPANEL\tOPTIONS\n')

        for token in sorted(sample_set.keys()):
            sample, assay, panel = token
            markers = sample_set[token]

            db_panel = dbh.get_panel(panel)
            s_panel_markers = set( x.upper() for x in db_panel.get_marker_codes())
            s_assay_markers = set(
                    ('%s/%s' % (species, x) if (species and '/' not in x) else x).upper()
                    for x in markers )

            excludes = s_panel_markers - s_assay_markers
            if s_assay_markers - s_panel_markers:
                cexit('ERROR inconsistent marker(s) for sample %s assay %s: %s' % 
                    (sample, assay, str(s_assay_markers - s_panel_markers)))

            if excludes:
                excludes = 'exclude=%s' % ','.join(excludes)
            else:
                excludes = ''

            outfile.write('%s\t%s\t%s\t%s\n' % (sample, assay, panel, excludes))

        outfile.close()
コード例 #4
0
ファイル: analyze.py プロジェクト: edawine/fatools
def do_export(args, dbh):

    from fatools.lib.analytics.export import export

    query = get_query( args, dbh )
    analytical_sets = query.get_filtered_analytical_sets()
    if analytical_sets.total_samples <= 0:
        cexit('ERR - query does not yield any sample data')
    else:
        cerr('INFO - total sampel number: %d' % analytical_sets.total_samples)
    output = export( analytical_sets, dbh, outfile = args.outfile, format = args.outformat )
    cout('Done.')
コード例 #5
0
ファイル: dbmgr.py プロジェクト: edawine/fatools
def do_viewbin(args, dbh):

    if not args.marker:
        cexit('ERR - please provide marker code')

    markers = [ dbh.get_marker(code) for code in args.marker.split(',') ]
    batch = dbh.get_batch( args.batch or 'default')

    for m in markers:
        cout('Marker: %s' % m.label)
        cout('    Bin   Mean   25%P   75%P   Width')
        cout('  ====================================')
        for binset in m.get_bin(batch).sortedbins:
            cout('   %3d  %5.2f  %5.2f  %5.2f  %4.2f' %
                    (binset[0], binset[1], binset[2], binset[3], binset[3] - binset[2]))
コード例 #6
0
def do_viewbin(args, dbh):

    if not args.marker:
        cexit('ERR - please provide marker code')

    markers = [dbh.get_marker(code) for code in args.marker.split(',')]
    batch = dbh.get_batch(args.batch or 'default')

    for m in markers:
        cout('Marker: %s' % m.label)
        cout('    Bin   Mean   25%P   75%P   Width')
        cout('  ====================================')
        for binset in m.get_bin(batch).sortedbins:
            cout('   %3d  %5.2f  %5.2f  %5.2f  %4.2f' %
                 (binset[0], binset[1], binset[2], binset[3],
                  binset[3] - binset[2]))
コード例 #7
0
ファイル: dbmgr.py プロジェクト: edawine/fatools
def do_initbin(args, dbh):

    if not args.marker:
        cexit('ERR - please provide marker code')

    if '-' not in args.range:
        cexit('ERR - please provide range for bin')

    if not args.batch:
        args.batch = 'default'
    batch = dbh.get_batch( args.batch )

    markers = [ dbh.get_marker(code) for code in args.marker.split(',') ]
    ranges = args.range.split('-')
    start_range = int(ranges[0])
    end_range = int(ranges[1])

    print(markers)
    for m in markers:
        m.initbins(start_range, end_range, batch)
        cerr('INFO  - bin for marker %s with batch %s has been created.' % (m.label, batch.code))
コード例 #8
0
ファイル: dbmgr.py プロジェクト: nebiolabs/fatools
def do_initbin(args, dbh):

    if not args.marker:
        cexit('ERR - please provide marker code')

    if '-' not in args.range:
        cexit('ERR - please provide range for bin')

    if not args.batch:
        args.batch = 'default'
    batch = dbh.get_batch(args.batch)

    markers = [dbh.get_marker(code) for code in args.marker.split(',')]
    ranges = args.range.split('-')
    start_range = int(ranges[0])
    end_range = int(ranges[1])

    print(markers)
    for m in markers:
        m.initbins(start_range, end_range, batch)
        cerr('INFO  - bin for marker %s with batch %s has been created.' %
             (m.label, batch.code))
コード例 #9
0
ファイル: cmds.py プロジェクト: edawine/fatools
def get_fsa_list( args, dbh ):
    """
    get fsa instance from database based on parameters in args
    """

    if not args.batch:
        cexit('ERR: using database requires --batch argument!', 1)

    batch = dbh.get_batch( args.batch )
    if not batch:
        cexit('ERR: batch %s not found!' % args.batch, 1)

    samples = []
    if args.sample:
        samples = args.sample.split(',')

    fsas = []
    if args.fsa:
        fsas = args.assay.split(',')

    panels = []
    if args.panel:
        panels = args.panel.split(',')

    markers = []
    if args.marker:
        markers = dbh.get_markers(args.panel.split(','))

    fsa_list = []
    for sample in batch.samples:
        if samples and sample.code not in samples: continue
        for assay in sample.assays:
            if assays and assay.filename not in assays: continue
            if panels and assay.panel.code not in panels: continue
            fsa_list.append( (assay, sample.code) )

    cerr('I: number of assays to be processed: %d' % len(assay_list))
    return fsa_list
コード例 #10
0
def get_fsa_list(args, dbh):
    """
    get fsa instance from database based on parameters in args
    """

    if not args.batch:
        cexit('ERR: using database requires --batch argument!', 1)

    batch = dbh.get_batch(args.batch)
    if not batch:
        cexit('ERR: batch %s not found!' % args.batch, 1)

    samples = []
    if args.sample:
        samples = args.sample.split(',')

    fsas = []
    if args.fsa:
        fsas = args.assay.split(',')

    panels = []
    if args.panel:
        panels = args.panel.split(',')

    markers = []
    if args.marker:
        markers = dbh.get_markers(args.panel.split(','))

    fsa_list = []
    for sample in batch.samples:
        if samples and sample.code not in samples: continue
        for assay in sample.assays:
            if assays and assay.filename not in assays: continue
            if panels and assay.panel.code not in panels: continue
            fsa_list.append((assay, sample.code))

    cerr('I: number of assays to be processed: %d' % len(assay_list))
    return fsa_list
コード例 #11
0
ファイル: dbmgr.py プロジェクト: nebiolabs/fatools
def do_initsample(args, dbh):

    if not args.batch:
        cerr('ERR: batch code must be supplied!')
        sys.exit(1)

    b = dbh.Batch.search(args.batch, dbh.session)
    cout('INFO - using batch code: %s' % b.code)

    name, ext = os.path.splitext(args.infile)

    if ext in ['.csv', '.tab', '.tsv']:

        delim = ',' if ext == '.csv' else '\t'

        dict_samples, errlog, sample_codes = b.get_sample_class().csv2dict(
            open(args.infile), with_report=True, delimiter=delim)

        if dict_samples is None:
            cout('Error processing sample info file')
            cout('\n'.join(errlog))
            cexit('Terminated!')

    elif ext in ['.json', '.yaml']:
        payload = yaml.load(open(args.infile))
        sample_codes = payload['codes']
        dict_samples = payload['samples']

    inserted = 0
    updated = 0

    # get default location and subject first (to satisfy RDBMS constraints)
    null_location = dbh.search_location(auto=True)
    #null_subject = dbh.search_subject('null', auto=True) ## <- this shouldn't be here !!

    session = dbh.session()

    with session.no_autoflush:

        for sample_code in sample_codes:
            d_sample = dict_samples[sample_code]

            db_sample = b.search_sample(sample_code)

            if not db_sample:
                db_sample = b.add_sample(sample_code)
                inserted += 1
                cout('INFO - sample: %s added.' % db_sample.code)
                db_sample.location = null_location
                #db_sample.subject = null_subject
                #print(d_sample)
                #dbh.session().flush( [db_sample] )

            else:
                cout('INFO - sample: %s being updated...' % db_sample.code)
                updated += 1

            db_sample.update(d_sample)
            session.flush([db_sample])

    cout('INFO - inserted new %d sample(s), updated %d sample(s)' %
         (inserted, updated))

    return

    inrows = csv.reader(
        open(args.infile),
        delimiter=',' if args.infile.endswith('.csv') else '\t')

    next(inrows)  # discard the 1st line

    counter = 0
    for row in inrows:
        s = b.add_sample(row[0])
        counter += 1
        cout('INFO - sample: %s added.' % s.code)

    cout('INFO - number of new sample(s): %d' % counter)
コード例 #12
0
ファイル: dbmgr.py プロジェクト: edawine/fatools
def do_initsample(args, dbh):

    if not args.batch:
        cerr('ERR: batch code must be supplied!')
        sys.exit(1)

    b = dbh.Batch.search(args.batch, dbh.session)
    cout('INFO - using batch code: %s' % b.code)

    name, ext = os.path.splitext( args.infile )

    if ext in [ '.csv', '.tab', '.tsv' ]:

        delim = ',' if ext == '.csv' else '\t'

        dict_samples, errlog, sample_codes = b.get_sample_class().csv2dict(
                open(args.infile), with_report=True, delimiter = delim )

        if dict_samples is None:
            cout('Error processing sample info file')
            cout('\n'.join(errlog))
            cexit('Terminated!')

    elif ext in ['.json', '.yaml']:
        payload = yaml.load( open(args.infile) )
        sample_codes = payload['codes']
        dict_samples = payload['samples']

    inserted=0
    updated=0

    # get default location and subject first (to satisfy RDBMS constraints)
    null_location = dbh.search_location(auto=True)
    #null_subject = dbh.search_subject('null', auto=True) ## <- this shouldn't be here !!

    session = dbh.session()

    with session.no_autoflush:

        for sample_code in sample_codes:
            d_sample = dict_samples[sample_code]

            db_sample = b.search_sample( sample_code )

            if not db_sample:
                db_sample = b.add_sample( sample_code )
                inserted += 1
                cout('INFO - sample: %s added.' % db_sample.code)
                db_sample.location = null_location
                #db_sample.subject = null_subject
                #print(d_sample)
                #dbh.session().flush( [db_sample] )

            else:
                cout('INFO - sample: %s being updated...' % db_sample.code)
                updated += 1

            db_sample.update( d_sample )
            session.flush( [db_sample] )


    cout('INFO - inserted new %d sample(s), updated %d sample(s)' %
            (inserted, updated))

    return


    inrows = csv.reader( open(args.infile),
                delimiter = ',' if args.infile.endswith('.csv') else '\t' )

    next(inrows)    # discard the 1st line

    counter = 0
    for row in inrows:
        s = b.add_sample( row[0] )
        counter += 1
        cout('INFO - sample: %s added.' % s.code)

    cout('INFO - number of new sample(s): %d' % counter)
コード例 #13
0
ファイル: cmds.py プロジェクト: edawine/fatools
def open_fsa( args ):
    """ open FSA file(s) and prepare fsa instances
        requires: args.file, args.panel, args.panelfile
    """

    from fatools.lib.fileio.models import Marker, Panel, FSA

    if not args.panel:
        cexit('ERR: using FSA file(s) requires --panel argument!')

    if not args.panelfile:
        cerr('WARN: using default built-in panels')
        Panel.upload(params.default_panels)
    else:
        with open(args.panelfile) as f:
            # open a YAML file that describe panel sets
            Panel.upload(yaml.load(f))

    if not args.markerfile:
        Marker.upload(params.default_markers)
    else:
        raise NotImplementedError()

    panel = Panel.get_panel(args.panel)
    fsa_list = []
    index = 1

    # prepare caching
    cache_path = None
    if not args.no_cache:
        cache_path = os.path.join(os.path.expanduser('~'), '.fatools_caches', 'channels')
        if args.cache_path is not None:
            cache_path = os.path.join(args.cache_path, '.fatools_caches', 'channels')
        if not os.path.exists(cache_path):
            os.makedirs(cache_path)

    if args.file:
        for fsa_filename in args.file.split(','):
            fsa_filename = fsa_filename.strip()
            fsa = FSA.from_file(fsa_filename, panel, cache=not args.no_cache,
                                cache_path=cache_path)
            # yield (fsa, str(i))
            fsa_list.append( (fsa, str(index)) )
            index += 1

    elif args.infile:

        with open(args.infile) as f:
            buf, delim = detect_buffer( f.read() )
        inrows = csv.DictReader( StringIO(buf), delimiter=delim )
        line = 1
        index = 1

        for r in inrows:

            line += 1

            fsa_filename = r['FILENAME'].strip()
            if fsa_filename.startswith('#'):
                continue

            if r.get('OPTIONS', None):
                options = tokenize( r['OPTIONS'] )
            else:
                options = None

            panel_code = r.get('PANEL', None) or args.panel
            panel = Panel.get_panel(panel_code)

            fsa = FSA.from_file(fsa_filename, panel, options, cache=not args.no_cache,
                                cache_path=cache_path)
            if 'SAMPLE' in inrows.fieldnames:

                # yield (fsa, r['SAMPLE'])
                fsa_list.append( (fsa, r['SAMPLE']) )
            else:

                # yield (fsa, str(index))
                fsa_list.append( (fsa, str(index)) )
                index += 1

    return fsa_list
コード例 #14
0
def open_fsa(args, _params):
    """ open FSA file(s) and prepare fsa instances
        requires: args.file, args.panel, args.panelfile
    """

    from fatools.lib.fileio.models import Marker, Panel, FSA

    if not args.panel:
        cexit('ERR: using FSA file(s) requires --panel argument!')

    if not args.panelfile:
        cerr('WARN: using default built-in panels')
        Panel.upload(params.default_panels)
    else:
        with open(args.panelfile) as f:
            # open a YAML file that describe panel sets
            import yaml
            Panel.upload(yaml.load(f))

    if not args.markerfile:
        Marker.upload(params.default_markers)
    else:
        raise NotImplementedError()

    panel = Panel.get_panel(args.panel)
    fsa_list = []
    index = 1

    # prepare caching
    if args.use_cache:
        if not os.path.exists('.fatools_caches/channels'):
            os.makedirs('.fatools_caches/channels')

    if args.file:
        for fsa_filename in args.file.split(','):
            fsa_filename = fsa_filename.strip()

            if args.indir != "":
                filename = args.indir + "/" + fsa_filename
            else:
                filename = fsa_filename

            fsa = FSA.from_file(filename,
                                panel,
                                _params,
                                cache=not args.no_cache)
            # yield (fsa, str(i))
            fsa_list.append((fsa, str(index)))
            index += 1

    elif args.infile:

        with open(args.infile) as f:
            buf, delim = detect_buffer(f.read())
        inrows = csv.DictReader(StringIO(buf), delimiter=delim)
        line = 1
        index = 1

        for r in inrows:

            line += 1

            fsa_filename = r['FILENAME'].strip()
            if fsa_filename.startswith('#'):
                continue

            if r.get('OPTIONS', None):
                options = tokenize(r['OPTIONS'])
            else:
                options = None

            panel_code = r.get('PANEL', None) or args.panel
            panel = Panel.get_panel(panel_code)

            fsa = FSA.from_file(fsa_filename,
                                panel,
                                _params,
                                options,
                                cache=not args.no_cache)
            if 'SAMPLE' in inrows.fieldnames:

                # yield (fsa, r['SAMPLE'])
                fsa_list.append((fsa, r['SAMPLE']))
            else:

                # yield (fsa, str(index))
                fsa_list.append((fsa, str(index)))
                index += 1

    elif args.indir:
        import glob
        for fsa_filename in sorted(glob.glob(args.indir + "/*.fsa")):

            fsa_filename = fsa_filename.strip()
            fsa = FSA.from_file(fsa_filename,
                                panel,
                                _params,
                                cache=not args.no_cache)
            # yield (fsa, str(i))
            fsa_list.append((fsa, str(index)))
            index += 1

    return fsa_list