def run(start_from_here=False): if not test_entrez_conn(): log.error('No internet connection: cannot fetch annotations.') return 4 new_annotations_dir = 'new_annotations' log.debug(' Using ref ids: ' + str(p.ids_list)) ref_ids = read_list(p.ids_list) res = fetch_annotations_for_ids(new_annotations_dir, ref_ids) if res != 0: return res res = make_proteomes(new_annotations_dir, new_proteomes_dir) if res != 0: return res for fname in listdir(new_annotations_dir): if fname[0] != '.': copy(join(new_annotations_dir, fname), config.annotations_dir) for fname in listdir(new_proteomes_dir): if fname[0] != '.': copy(join(new_proteomes_dir, fname), config.proteomes_dir) return 0
def run(starting_from_here=False): if p.species_list: if not test_entrez_conn(): log.error(' No internet connection: cannot fetch annotations.') return 4 log.debug(' Using species list: ' + str(p.species_list)) gb_ids = read_list(p.species_list) log.debug('species_list: ' + str(gb_ids)) res = fetch_annotations_species_name_entrez(config.annotations_dir, gb_ids, p.proxy) if res != 0: return res return make_proteomes(config.annotations_dir, config.proteomes_dir) elif p.ids_list: if not test_entrez_conn(): log.error('No internet connection: cannot fetch annotations.') return 4 log.debug(' Using ref ids: ' + str(p.ids_list)) ref_ids = read_list(p.ids_list) res = fetch_annotations_for_ids(config.annotations_dir, ref_ids) if res != 0: return res return make_proteomes(config.annotations_dir, config.proteomes_dir) else: proteomes, annotations = [], [] if p.proteomes: proteomes, annotations = collect_proteomes_and_annotaitons(p.proteomes) if proteomes == []: interrupt('No fasta found in ' + p.proteomes) if p.annotations: proteomes, annotations = collect_proteomes_and_annotaitons(p.annotations) if annotations == []: interrupt('No gb files found in ' + p.annotations) #if not proteomes and not annotations: # interrupt('Directory must contain fasta or genbank files.') # #if proteomes and annotations: # log.warn('Directory %s contains both fasta and genbank files, using fasta.') if annotations: if not isdir(config.annotations_dir): mkdir(config.annotations_dir) for annotation in annotations: copy(annotation, config.annotations_dir) return make_proteomes(config.annotations_dir, config.proteomes_dir) elif proteomes: if not isdir(config.proteomes_dir): mkdir(config.proteomes_dir) if p.download_anno: if not test_entrez_conn(): #log.error(' Error: no internet connection, cannot fetch annotations. ' # 'You can start without a --no-fetch option, in this case ' # 'a reduced version of orthogroups.txt with no annotations will be produced.') #return 1 log.error(' Warning: no internet connection, cannot fetch annotations. ' 'A reduced version of orthogroups.txt with no annotations will be produced.') else: # ref_ids = [splitext(basename(prot_file))[0] for prot_file in proteomes] # fetch_annotations_for_ids(config.annotations_dir, ref_ids) gb_ids = [splitext(basename(prot_file))[0] for prot_file in proteomes] log.debug('ids_list: ' + str(gb_ids)) res = fetch_annotations_for_ids(config.annotations_dir, gb_ids, p.proxy) if res > 0: return res if res == -1: p.download_anno = False return adjust_proteomes(proteomes, config.proteomes_dir, p.prot_id_field)
def run(starting_from_here=False): if p.species_list: if not test_entrez_conn(): log.error( ' No internet connection: cannot fetch annotations.') return 4 log.debug(' Using species list: ' + str(p.species_list)) gb_ids = read_list(p.species_list) log.debug('species_list: ' + str(gb_ids)) res = fetch_annotations_species_name_entrez( config.annotations_dir, gb_ids, p.proxy) if res != 0: return res return make_proteomes(config.annotations_dir, config.proteomes_dir) elif p.ids_list: if not test_entrez_conn(): log.error('No internet connection: cannot fetch annotations.') return 4 log.debug(' Using ref ids: ' + str(p.ids_list)) ref_ids = read_list(p.ids_list) res = fetch_annotations_for_ids(config.annotations_dir, ref_ids) if res != 0: return res return make_proteomes(config.annotations_dir, config.proteomes_dir) else: proteomes, annotations = [], [] if p.proteomes: proteomes, annotations = collect_proteomes_and_annotaitons( p.proteomes) if proteomes == []: interrupt('No fasta found in ' + p.proteomes) if p.annotations: proteomes, annotations = collect_proteomes_and_annotaitons( p.annotations) if annotations == []: interrupt('No gb files found in ' + p.annotations) #if not proteomes and not annotations: # interrupt('Directory must contain fasta or genbank files.') # #if proteomes and annotations: # log.warn('Directory %s contains both fasta and genbank files, using fasta.') if annotations: if not isdir(config.annotations_dir): mkdir(config.annotations_dir) for annotation in annotations: copy(annotation, config.annotations_dir) return make_proteomes(config.annotations_dir, config.proteomes_dir) elif proteomes: if not isdir(config.proteomes_dir): mkdir(config.proteomes_dir) if p.download_anno: if not test_entrez_conn(): #log.error(' Error: no internet connection, cannot fetch annotations. ' # 'You can start without a --no-fetch option, in this case ' # 'a reduced version of orthogroups.txt with no annotations will be produced.') #return 1 log.error( ' Warning: no internet connection, cannot fetch annotations. ' 'A reduced version of orthogroups.txt with no annotations will be produced.' ) else: # ref_ids = [splitext(basename(prot_file))[0] for prot_file in proteomes] # fetch_annotations_for_ids(config.annotations_dir, ref_ids) gb_ids = [ splitext(basename(prot_file))[0] for prot_file in proteomes ] log.debug('ids_list: ' + str(gb_ids)) res = fetch_annotations_for_ids( config.annotations_dir, gb_ids, p.proxy) if res > 0: return res if res == -1: p.download_anno = False return adjust_proteomes(proteomes, config.proteomes_dir, p.prot_id_field)