Ejemplo n.º 1
0
def check_arguments(args):
    errors = []
    errors += check_inputs(args, stdin_allowed=True)
    errors += check_outputs(args, stdout_allowed=True)
    errors += check_cpu_count(args)
    errors += check_file_arg(args.seqids_path, none_allowed=True,
                             prefix='-S/--seqids_path')
    errors += check_file_arg(args.descriptions_path, none_allowed=True,
                             prefix='-D/--descriptions_path')
    errors += check_file_arg(args.names_path, none_allowed=True,
                             prefix='-N/--names_path')
    errors += check_file_arg(args.mol_types_path, none_allowed=True,
                             prefix='-M/--mol_types_path')
    errors += check_file_arg(args.taxids_path, none_allowed=True,
                             prefix='-T/--taxids_path')
    if args.input_format != 'genbank':
        values = [args.names, args.mol_types, args.taxids]
        paths = [args.names_path, args.mol_types_path, args.taxids_path]
        prefixes = ['-n/-N', '-m/-M', '-t/-T']
        for value, path, prefix in zip(values, paths, prefixes):
            if value or path is not None:
                errors += ('{}: These options require input file(s) '
                           'in GenBank format.'.format(prefix))
    if args.comparison is None:
        args.comparison = COMPARISON
    if errors:
        for error in errors:
            print_stderr(error, prefix='ERROR')
        sys.exit(1)
Ejemplo n.º 2
0
def check_arguments(args):
    errors = []
    # Insert argument verifications here
    if errors:
        for error in errors:
            print_stderr(error, prefix='ERROR')
        sys.exit(1)
Ejemplo n.º 3
0
def print_missing_seqids(seqids_found):
    not_found = []
    for seqid in seqids_found:
        if not seqids_found[seqid]:
            not_found.append(seqid)
    if not_found:
        print_stderr('{:d} identifier(s) not found: {}'.format(
                        len(not_found), ', '.join(not_found)))
Ejemplo n.º 4
0
def import_annotations(annot_paths,
                       types,
                       levels,
                       obsolete,
                       main_ids,
                       add_ancestors,
                       ancestors,
                       min_level=MIN_LEVEL,
                       max_level=MAX_LEVEL):
    bp, mf, cc, ec = {}, {}, {}, {}
    bp_anc, mf_anc, cc_anc = None, None, None
    if add_ancestors:
        bp_anc, mf_anc, cc_anc = {}, {}, {}

    for annot_path in annot_paths:
        with open_file(annot_path) as annot_file:
            for row in csv.reader(annot_file, dialect='excel-tab'):
                seqid = row[0]
                go_id = row[1]

                if not (go_id.startswith('GO:') or go_id.startswith('EC:')):
                    print_stderr(
                        'ERROR: Unknown annotation type: {}.'.format(go_id))
                    continue

                if go_id.startswith('EC:'):
                    updict_add_to_set(ec, seqid, go_id)
                    continue

                try:
                    go_type = types[go_id]
                except KeyError:
                    print_stderr(
                        'ERROR: Annotation not found: {}.'.format(go_id))
                    continue

                if obsolete[go_id]:
                    continue
                if levels[go_id] < min_level:
                    continue
                if max_level is not None and levels[go_id] > max_level:
                    continue
                try:
                    go_id = main_ids[go_id]
                except KeyError:
                    pass

                if go_type == 'biological_process':
                    add_annotation(bp, seqid, go_id, bp_anc, ancestors)
                elif go_type == 'molecular_function':
                    add_annotation(mf, seqid, go_id, mf_anc, ancestors)
                elif go_type == 'cellular_component':
                    add_annotation(cc, seqid, go_id, cc_anc, ancestors)

    return bp, mf, cc, ec, bp_anc, mf_anc, cc_anc
Ejemplo n.º 5
0
def check_arguments(args):
    errors = []
    errors += check_file_arg(args.input_path, stdio_allowed=True)
    errors += check_file_arg(args.output_path,
                             mode='w',
                             stdio_allowed=True,
                             prefix='-o/--output_path')
    if errors:
        for error in errors:
            print_stderr(error, prefix='ERROR')
        sys.exit(1)
Ejemplo n.º 6
0
def export_removed_list(bad_records, removed_path=REMOVED_PATH):
    if not bad_records:
        print_stderr('No bad record found.', time=False)
    else:
        nb_bad = len(bad_records)
        bad = '\n'.join(sorted(bad_records))
        if nb_bad == 1:
            msg = '1 bad record found: {}'.format(bad)
        else:
            msg = '{:d} bad records found:\n{}'.format(nb_bad, bad)
        print_stderr(msg, time=False)
        if removed_path is not None:
            with open(removed_path, 'w') as removed_file:
                removed_file.write(bad)
Ejemplo n.º 7
0
def check_arguments(args):
    errors = []
    errors += check_sequtils_inputs(args, stdin_allowed=True)
    errors += check_file_arg(args.seq_counts_path,
                             mode='w',
                             none_allowed=True,
                             prefix='-S/--seq_counts_path')
    errors += check_file_arg(args.base_counts_path,
                             mode='w',
                             none_allowed=True,
                             prefix='-C/--base_counts_path')
    if errors:
        for error in errors:
            print_stderr(error, prefix='ERROR')
        sys.exit(1)
Ejemplo n.º 8
0
def check_arguments(args):
    errors = []
    errors += check_inputs(args)
    errors += check_outputs(args, stdout_allowed=True)
    errors += check_cpu_count(args)
    errors += check_file_arg(args.removed_path,
                             mode='w',
                             none_allowed=True,
                             prefix='-r/--removed')
    if args.no_output:
        args.output_paths = None
    if errors:
        for error in errors:
            print_stderr(error, prefix='ERROR')
        sys.exit(1)
Ejemplo n.º 9
0
def check_arguments(args):
    errors = []
    for annot_path in args.annot_paths:
        errors += check_file_arg(annot_path, stdio_allowed=True)
    errors += check_file_arg(args.tree_path, prefix='-t/--tree_path')
    errors += check_file_arg(args.seqids_path,
                             none_allowed=True,
                             prefix='-s/--seqids_path')
    errors += check_file_arg(args.output_path,
                             mode='w',
                             stdio_allowed=True,
                             prefix='-o/--output_path')
    if errors:
        for error in errors:
            print_stderr(error, prefix='ERROR')
        sys.exit(1)
Ejemplo n.º 10
0
def check_arguments(args):
    errors = []
    errors += check_inputs(args, zip_ext=BGZIP_EXT)
    if args.output_path is None:
        if len(args.input_paths) > 1:
            args.output_path = 'index.idx'
        else:
            input_basename = os.path.basename(args.input_paths[0])
            args.output_path = input_basename + '.idx'
    errors += check_file_arg(args.output_path,
                             mode='w',
                             prefix='-o/--output_path')

    if errors:
        for error in errors:
            print_stderr(error, prefix='ERROR')
        sys.exit(1)
Ejemplo n.º 11
0
def get_search_settings(args):
    seqids = get_values(args.seqids, args.seqids_path)
    descriptions = get_values(args.descriptions, args.descriptions_path)
    names = get_values(args.names, args.names_path)
    mol_types = get_values(args.mol_types, args.mol_types_path)
    taxids = get_values(args.taxids, args.taxids_path)
    if not (seqids or descriptions or names or mol_types or taxids):
        print_stderr('ERROR: No search criterion specified.')
        sys.exit(0)

    simple_case = check_if_simple_case(
        seqids, descriptions, names, mol_types, taxids,
        args.inverse, args.comparison)

    comparison_func = set_comparison_func(args.comparison)
    return (seqids, descriptions, names, mol_types, taxids,
            comparison_func, simple_case)
Ejemplo n.º 12
0
def extract_records_by_seqid(seqids, input_paths=INPUT_PATHS,
                             input_format=SEQFILE_FORMAT,
                             index_path=INDEX_PATH, output_path=OUTPUT_PATH,
                             output_format=SEQFILE_FORMAT):
    records_list = make_indexed_records_list(
        input_paths, index_path, input_format)
    with open_file(output_path, 'w') as output_file:
        not_found = []
        for seqid in seqids:
            try:
                record = get_indexed_record(seqid, records_list)
            except KeyError:
                not_found.append(seqid)
            else:
                write_records(record, output_file, output_format)
    if not_found:
        print_stderr('{:d} identifier(s) not found: {}'.format(
                        len(not_found), ', '.join(not_found)))
Ejemplo n.º 13
0
def check_arguments(args):
    errors = []
    errors += check_outputs(args, stdout_allowed=True)
    errors += check_cpu_count(args)
    errors += check_num_arg(
        args.min_length, number_type=int, mini=1,
        prefix='-l/--min_length')
    if args.max_length is None:
        args.max_length = args.min_length
    else:
        errors += check_num_arg(
            args.max_length, number_type=int, mini=args.min_length,
            prefix='-L/--max_length')
    errors += check_num_arg(
        args.nb_sequences, number_type=int, mini=1,
        prefix='-n/--nb_sequences')
    if args.seq_type is None:
        args.seq_type = SEQ_TYPE
    if errors:
        for error in errors:
            print_stderr(error, prefix='ERROR')
        sys.exit(1)
Ejemplo n.º 14
0
def check_arguments(args):
    errors = []
    errors += check_sequtils_inputs(args, stdin_allowed=True)
    errors += check_sequtils_outputs(args, stdout_allowed=True)
    errors += check_num_arg(args.min_length,
                            number_type=int,
                            mini=1,
                            prefix='-l/--min_length')
    errors += check_num_arg(args.max_length,
                            number_type=int,
                            mini=args.min_length,
                            none_allowed=True,
                            prefix='-L/--max_length')
    errors += check_num_arg(args.min_count,
                            number_type=int,
                            mini=0,
                            prefix='-n/--min_count')
    errors += check_num_arg(args.max_count,
                            number_type=int,
                            mini=args.min_count,
                            none_allowed=True,
                            prefix='-N/--max_count')
    errors += check_num_arg(args.min_prop,
                            number_type=float,
                            mini=0.0,
                            maxi=100.0,
                            prefix='-p/--min_prop')
    errors += check_num_arg(args.max_prop,
                            number_type=float,
                            mini=args.min_prop,
                            maxi=100.0,
                            prefix='-P/--max_prop')
    if args.counted_char is not None and len(args.counted_char) != 1:
        errors.append('-c/--counted_char: Must be one character.')

    if errors:
        for error in errors:
            print_stderr(error, prefix='ERROR')
        sys.exit(1)
Ejemplo n.º 15
0
def check_arguments(args):
    errors = []
    for annot_path in args.annot_paths:
        errors += check_file_arg(annot_path, stdio_allowed=True)
    for sample_path in args.sample_paths:
        errors += check_file_arg(sample_path, prefix='-s/--sample_path')
    errors += check_file_arg(args.tree_path, prefix='-t/--tree_path')
    errors += check_file_arg(args.ref_path, prefix='-r/--ref_path')
    errors += check_file_arg(args.info_path,
                             none_allowed=True,
                             prefix='-i/--info_path')
    errors += check_dir_arg(args.output_dir_path,
                            mode='w',
                            create=True,
                            prefix='-o/--output_dir_path')
    errors += check_bin_arg(args.lualatex_path, prefix='--lualatex_path')
    errors += check_num_arg(args.min_level,
                            number_type=int,
                            mini=1,
                            prefix='-l/--min_level')
    errors += check_num_arg(args.max_level,
                            number_type=int,
                            mini=1,
                            none_allowed=True,
                            prefix='-L/--max_level')
    errors += check_num_arg(args.min_fold_change,
                            number_type=float,
                            mini=1.0,
                            prefix='-f/--min_fold_change')
    errors += check_num_arg(args.max_pvalue,
                            number_type=float,
                            mini=0.0,
                            maxi=1.0,
                            prefix='-p/--max_pvalue')
    errors += check_num_arg(args.min_seq_count,
                            number_type=int,
                            mini=0,
                            prefix='-n/--min_seq_count')
    errors += check_num_arg(args.min_seq_prop,
                            number_type=float,
                            mini=0.0,
                            maxi=100.0,
                            prefix='-m/--min_seq_prop')
    if args.max_level is not None and args.min_level > args.max_level:
        error = '-l/-L: Max level must be greater than min level.'
        errors.append(error)
    if args.sample_names is not None:
        if len(args.sample_names) != len(args.sample_paths):
            error = ('-s/-S: The number of sample names does not match '
                     'the number of sample paths.')
            errors.append(error)
    if errors:
        for error in errors:
            print_stderr(error, prefix='ERROR')
        sys.exit(1)

    if args.output_types is None:
        args.output_types = OUTPUT_TYPES

    if not (args.process or args.function or args.component):
        args.process, args.function, args.component = True, True, True

    if not (args.up_reg or args.down_reg or args.not_reg):
        args.up_reg, args.down_reg, args.not_reg = True, True, False