def convert_file(input, output): proteins = [] uniprots = [] with open(input) as f: for line in f.read().splitlines(): values = line.split(',') proteins.append(values[0]) uniprots.append(values[1]) gene_ids = gene_ids_from_uniprot_accessions(uniprots) with open(output, 'wb') as out: out.write('#PROTEIN\tGENE ID\n') for protein, gene_id in zip(proteins, gene_ids): if gene_id: out.write(protein + '\t' + ';'.join(gene_id) + '\n')
# Converts the original flu genes file (original_flu_genes.csv) from Uniprot # Accession IDs to Gene IDs, and only retains human genes. # # Usage: python scripts/01_filter_flu_genes.py > data/flu_gene_ids.tsv from convert_uniprot import gene_ids_from_uniprot_accessions print '# Flu Proteins/Genes' print '# PROTEIN GENE' with open('data/original_flu_genes.csv') as f: proteins = [] uniprots = [] for line in f: values = line.split(',') entry_name = values[4] if entry_name.strip().endswith('_HUMAN'): proteins.append(values[0]) uniprots.append(values[1]) gene_ids = gene_ids_from_uniprot_accessions(uniprots) for protein, gene_id in zip(proteins, gene_ids): if gene_id: print (protein + '\t' + ';'.join(gene_id))
# Converts the original flu genes file (original_flu_genes.csv) from Uniprot # Accession IDs to Gene IDs, and only retains human genes. # # Usage: python scripts/01_filter_flu_genes.py > data/flu_gene_ids.tsv from convert_uniprot import gene_ids_from_uniprot_accessions print '# Flu Proteins/Genes' print '# PROTEIN GENE' with open('data/original_flu_genes.csv') as f: proteins = [] uniprots = [] for line in f: values = line.split(',') entry_name = values[4] if entry_name.strip().endswith('_HUMAN'): proteins.append(values[0]) uniprots.append(values[1]) gene_ids = gene_ids_from_uniprot_accessions(uniprots) for protein, gene_id in zip(proteins, gene_ids): if gene_id: print(protein + '\t' + ';'.join(gene_id))