예제 #1
0
def setup_logging(log_level=None,
                  output_collection=None,
                  db_host=None,
                  log_name=None,
                  **kw):
    try:
        database, collection = output_collection.split('.', 1)
        handler = MongoHandler(db_host, database, '.'.join(
            (collection, log_name)))
        log_level = getattr(logging, log_level.upper())
        handler.setLevel(log_level)
        fields = {
            'datetime': 'created',
            'process_id': 'process',
            'level': 'levelname',
            'message': 'message'
        }
        info = {'host': gethostname()}
        formatter = MongoFormatter(fields, info)
        handler.setFormatter(formatter)
        logging.root.addHandler(handler)
        logging.root.setLevel(log_level)
        return handler.getClient()
    except:
        sys.stderr.write(' Unable to set up logging:\n' + format_exc())
        masterslave.exit(1)
예제 #2
0
def istats(output_directory=None, mode='s', **kw):
    try:
        filenames = glob.iglob(os.path.join(output_directory, '*.json'))
    except:
        logging.critical(' Unable to open ' + output_directory)
        masterslave.exit(1)
    for filename in filenames:
        try:
            if filename[-7:] == '_g.json':
                continue
            gene = os.path.basename(filename[:-5])

            with open(filename) as f_file:
                f_stats = json.load(f_file)

            g_stats = []
            g_filepath = os.path.join(output_directory, gene + '_g.json')
            if os.path.exists(g_filepath):
                if mode == 's':
                    logging.info(' Skipping ' + gene + ': output exists')
                    continue
                elif mode == 'a':
                    with open(g_filepath) as g_file:
                        g_stats = json.load(g_file)

            last_triad = None
            stats = []
            for f_row in f_stats:
                if 'tip_names' not in f_row:
                    continue
                triad = frozenset(f_row['tip_names'])
                if last_triad != triad:
                    last_triad = triad
                    stats.append([])
                stats[-1].append(f_row)

            if g_stats == []:
                g_stats = [{} for i in range(len(stats))]

            if len(g_stats) != len(stats):
                logging.error(' Skipping ' + filename + ':\n' + ' found ' +
                              str(len(stats)) + ' triad(s) and ' +
                              str(len(g_stats)) +
                              ' bootstrap records(s). Numbers should match.')
                continue

            for f_stats, g_stats in zip(stats, g_stats):
                yield gene, f_stats, g_stats

        except:
            logging.warning(' Skipping ' + filename + ':\n' + format_exc())
def setup_logging(log_level=None, log_file=None, **kw):
    try:
        if log_file:
            log_dir = os.path.dirname(log_file)
            masterslave.checkmakedirs(log_dir)
            handler = masterslave.MPIFileHandler(log_file)
        else:
            handler = logging.StreamHandler()
        log_level = getattr(logging, log_level.upper())
        handler.setLevel(log_level)
        hostpid = ''
        if masterslave.USING_MPI:
            hostpid = gethostname() + ':' + str(os.getpid()) + ':'
        formatter = logging.Formatter('%(asctime)s:' + hostpid +
                                      '%(levelname)s:%(message)s')
        handler.setFormatter(formatter)
        logging.root.addHandler(handler)
        logging.root.setLevel(log_level)
    except:
        sys.stderr.write(' Unable to set up logging:\n' + format_exc())
        masterslave.exit(1)
예제 #4
0
def write_files(gene, f_stats, g_stats, output_directory, log_file):
    try:
        f_stats.append({'log_file': log_file})
        filepath = os.path.join(output_directory, gene + '.json')
        with open(filepath) as infile:
            old_stats = json.load(infile)
            for row in old_stats:
                if 'log_file' in row:
                    f_stats.append(row)
        with open(filepath, 'w') as outfile:
            json.dump(f_stats, outfile)
        filepath = os.path.join(output_directory, gene + '_g.json')
        with open(filepath, 'w') as outfile:
            json.dump(g_stats, outfile)
    except:
        if gene:
            logging.error(' Problem collecting output for ' + gene + ':\n' +
                          format_exc())
        else:
            logging.critical(' Null gene in write_files()')
            masterslave.exit(1)
def itriads(input_directory=None,
            codon_position=-1,
            output_directory=None,
            force_recalculation=False,
            triad_file=None,
            num_range=None,
            **kw):
    try:
        filenames = glob.glob(os.path.join(input_directory, '*.fasta*'))
        if num_range:
            filenames = filenames[num_range]
    except:
        logging.critical(' Unable to open input directory:\n' + format_exc())
        masterslave.exit(1)

    try:
        if not os.path.exists(output_directory):
            os.makedirs(output_directory)  # only executed by master
    except:
        logging.critical(' Unable to open output directory:\n' + format_exc())
        masterslave.exit(1)

    for filename in filenames:
        try:
            if filename[-6:] == '.fasta':
                gene = os.path.basename(filename[:-6])
                with open(filename) as fastafile:
                    fastadata = fastafile.read()
            elif filename[-9:] == '.fasta.gz':
                gene = os.path.basename(filename[:-9])
                with GzipFile(filename) as fastafile:
                    fastadata = fastafile.read()
            else:
                continue

            out_filepath = os.path.join(output_directory, gene + '.json')
            if not force_recalculation and os.path.exists(out_filepath):
                logging.info(' Skipping ' + gene + ': output exists')
                continue

            sequences = LoadSeqs(data=fastadata)
            if codon_position > 0:
                c = codon_position
                indices = [(i, i + 1) for i in range(c - 1, len(sequences), 3)]
                pos3 = sequences.addFeature('pos3', 'pos3', indices)
                sequences = pos3.getSlice()
            if triad_file:
                with open(triad_file) as f:
                    triads = [l.split() for l in f]
            else:
                triads = combinations(sequences.getSeqNames(), 3)

            num_triads = 0
            for triad in triads:
                for taxon in triad:
                    if taxon not in sequences.Names:
                        logging.info(' Skipping ' + '/'.join(triad) + ' in ' +
                                     gene + ': ' + taxon + ' is missing')
                        break
                else:
                    num_triads += 1
                    sa = sequences.takeSeqs(triad)
                    sa = sa.filtered(lambda x: set(''.join(x)) <= set(DNA))
                    if len(sa) == 0:
                        logging.info(' Skipping ' + '/'.join(triad) + ' in ' +
                                     gene +
                                     ': filtered alignment has length zero')
                        continue
                    yield gene, sa

            if num_triads == 0:
                logging.info(' Skipping ' + gene + ': found no valid triads')

        except:
            logging.warning(' Skipping ' + filename + ':\n' + format_exc())