def maf2Dict(multipleAlignment, id = None): """ store multiple-alignment information in a dictionary """ data = dict() chrom = multipleAlignment[0].id.split(".")[1] genome_start = multipleAlignment[0].annotations['start'] genome_stop = genome_start + multipleAlignment[0].annotations['size'] data['key'] = chrom + "." + str(genome_start) data['genome_start'] = genome_start data['genome_stop'] = genome_stop data['chromosome'] = chrom data['speciesAligned'] = list(set(record.id.split(".")[0] \ for record in multipleAlignment)) data['nSpeciesAligned'] = len(data['speciesAligned']) data['maf'] = encoding.dumps(multipleAlignment.format("maf").replace( \ "# generated by Biopython\n", "")[:-2]) #save some space return data
def maf2Dict(multipleAlignment, id=None): """ store multiple-alignment information in a dictionary """ data = dict() chrom = multipleAlignment[0].id.split(".")[1] genome_start = multipleAlignment[0].annotations['start'] genome_stop = genome_start + multipleAlignment[0].annotations['size'] data['key'] = chrom + "." + str(genome_start) data['genome_start'] = genome_start data['genome_stop'] = genome_stop data['chromosome'] = chrom data['speciesAligned'] = list(set(record.id.split(".")[0] \ for record in multipleAlignment)) data['nSpeciesAligned'] = len(data['speciesAligned']) data['maf'] = encoding.dumps(multipleAlignment.format("maf").replace( \ "# generated by Biopython\n", "")[:-2]) #save some space return data
in a dictionary """ data = dict() chrom = multipleAlignment[0].id.split(".")[1] genome_start = multipleAlignment[0].annotations['start'] genome_stop = genome_start + multipleAlignment[0].annotations['size'] data['key'] = chrom + "." + str(genome_start) data['genome_start'] = genome_start data['genome_stop'] = genome_stop data['chromosome'] = chrom data['speciesAligned'] = list(set(record.id.split(".")[0] \ for record in multipleAlignment)) data['nSpeciesAligned'] = len(data['speciesAligned']) data['maf'] = encoding.dumps(multipleAlignment.format("maf").replace( \ "# generated by Biopython\n", "")[:-2]) #save some space return data def maf_dict_generator(mafFile): with open(mafFile, 'r') as mafFileHandle: for blockN, multipleAlignment in enumerate(Bio.AlignIO.parse(mafFileHandle, "maf")): yield maf2Dict(multipleAlignment, blockN) if __name__ == "__main__": mafFiles = sys.argv[1:] for mafFile in mafFiles: for converted in maf_dict_generator(mafFile): print encoding.dumps(converted)
data = dict() chrom = multipleAlignment[0].id.split(".")[1] genome_start = multipleAlignment[0].annotations['start'] genome_stop = genome_start + multipleAlignment[0].annotations['size'] data['key'] = chrom + "." + str(genome_start) data['genome_start'] = genome_start data['genome_stop'] = genome_stop data['chromosome'] = chrom data['speciesAligned'] = list(set(record.id.split(".")[0] \ for record in multipleAlignment)) data['nSpeciesAligned'] = len(data['speciesAligned']) data['maf'] = encoding.dumps(multipleAlignment.format("maf").replace( \ "# generated by Biopython\n", "")[:-2]) #save some space return data def maf_dict_generator(mafFile): with open(mafFile, 'r') as mafFileHandle: for blockN, multipleAlignment in enumerate( Bio.AlignIO.parse(mafFileHandle, "maf")): yield maf2Dict(multipleAlignment, blockN) if __name__ == "__main__": mafFiles = sys.argv[1:] for mafFile in mafFiles: for converted in maf_dict_generator(mafFile): print encoding.dumps(converted)