def add_barrnap_features(assemblies, features, barrnap_gff): for line in open(barrnap_gff): if line.startswith('#'): continue cols = line.split("\t") if len(cols) == 9: if cols[0] in assemblies: current_assembly = assemblies[cols[0]] else: current_assembly = things.Assembly(id=cols[0], residues='') assemblies[cols[0]] = current_assembly if cols[2] == 'rRNA': atts = gff.column_9_dict(cols[8]) feat_base = "rRNA_{0}".format(uuid.uuid4()) gene_id = "{0}_gene".format(feat_base) rRNA_id = "{0}_rRNA".format(feat_base) rfmin = int(cols[3]) - 1 rfmax = int(cols[4]) if cols[6] == '-': rstrand = -1 elif cols[6] == '+': rstrand = 1 else: rstrand = 0 gene = things.Gene(id=gene_id) gene.locate_on(target=current_assembly, fmin=rfmin, fmax=rfmax, strand=rstrand) features[gene_id] = gene current_assembly.add_gene(gene) rRNA = things.rRNA(id=rRNA_id, parent=gene) rRNA.locate_on(target=current_assembly, fmin=rfmin, fmax=rfmax, strand=rstrand) gene.add_rRNA(rRNA) rRNA.annotation = gff.parse_annotation_from_column_9(cols[8]) features[rRNA_id] = rRNA
def main(): parser = argparse.ArgumentParser( description='Updates 9th-column key/value pairs in GFF file using a batch-update file') ## output file to be written parser.add_argument('-i', '--input_file', type=str, required=True, help='A GFF3 file' ) parser.add_argument('-u', '--update_file', type=str, required=True, help='A two-column file (FeatureID, value)' ) parser.add_argument('-a', '--attribute', type=str, required=True, help='The attribute value to update' ) parser.add_argument('-o', '--output_file', type=str, required=True, help='Path to an output file to be created' ) parser.add_argument('-k', '--key', type=str, required=False, default='ID', help='Which key in the 9th column helps identify the row to be updated?' ) parser.add_argument('-t', '--type', type=str, required=False, help='Filter rows updated based on the 3rd (type) column' ) args = parser.parse_args() # protect the user if args.input_file == args.output_file: raise Exception("ERROR: Don't set --input_file and --output_file to be the same thing. Bad things will happen. Bad Things.") outfh = open(args.output_file, 'wt') # first read in the values to be updated changes = dict() for line in open(args.update_file): cols = line.rstrip().split("\t") if len(cols) != 2: print("WARNING: Skipping the following update line because two columns were expected:\n{0}".format(line)) continue changes[cols[0]] = cols[1] for line in open(args.input_file): cols = line.rstrip().split("\t") if len(cols) == 9: if args.type is None or args.type == cols[2]: atts = gff.column_9_dict(cols[8]) if args.key in atts and atts[args.key] in changes: atts[args.attribute] = changes[atts[args.key]] cols[8] = gff.build_column_9_from_dict(atts) outfh.write("\t".join(cols) + "\n")
def main(): parser = argparse.ArgumentParser( description='Put a description of your script here') ## output file to be written parser.add_argument('-i', '--input_file', type=str, required=True, help='Path to an input file to be read' ) parser.add_argument('-o', '--output_file', type=str, required=True, help='Path to an output file to be created' ) parser.add_argument('-t', '--feature_type', type=str, required=False, help='Only update features of a given type (gff column 3)' ) parser.add_argument('-k', '--key', type=str, required=True, help='Which column 9 key/attribute to update values for?' ) parser.add_argument('-a', '--appended_text', type=str, required=True, help='Text to append' ) args = parser.parse_args() # protect the user if args.input_file == args.output_file: raise Exception("ERROR: Don't set --input_file and --output_file to be the same thing. Bad things will happen. Bad Things.") ofh = open(args.output_file, 'wt') replacement_count = 0 for line in open(args.input_file): line = line.rstrip() cols = line.split("\t") if len(cols) == 9: if args.feature_type is None or args.feature_type == cols[2]: col9 = gff.column_9_dict(cols[8]) if args.key in col9: col9[args.key] = "{0}{1}".format(col9[args.key], args.appended_text) cols[8] = gff.build_column_9_from_dict(col9) replacement_count += 1 ofh.write("{0}\t{1}\n".format("\t".join(cols[0:8]), cols[8])) else: ofh.write("{0}\n".format(line)) else: ofh.write("{0}\n".format(line)) print("INFO: Made {0} replacements in the file".format(replacement_count))
def main(): parser = argparse.ArgumentParser( description= 'Updates 9th-column key/value pairs in GFF file using a batch-update file' ) ## output file to be written parser.add_argument('-i', '--input_file', type=str, required=True, help='A GFF3 file') parser.add_argument('-u', '--update_file', type=str, required=True, help='A two-column file (FeatureID, value)') parser.add_argument('-a', '--attribute', type=str, required=True, help='The attribute value to update') parser.add_argument('-o', '--output_file', type=str, required=True, help='Path to an output file to be created') parser.add_argument( '-k', '--key', type=str, required=False, default='ID', help='Which key in the 9th column helps identify the row to be updated?' ) parser.add_argument( '-t', '--type', type=str, required=False, help='Filter rows updated based on the 3rd (type) column') args = parser.parse_args() # protect the user if args.input_file == args.output_file: raise Exception( "ERROR: Don't set --input_file and --output_file to be the same thing. Bad things will happen. Bad Things." ) outfh = open(args.output_file, 'wt') # first read in the values to be updated changes = dict() for line in open(args.update_file): cols = line.rstrip().split("\t") if len(cols) != 2: print( "WARNING: Skipping the following update line because two columns were expected:\n{0}" .format(line)) continue changes[cols[0]] = cols[1] for line in open(args.input_file): cols = line.rstrip().split("\t") if len(cols) == 9: if args.type is None or args.type == cols[2]: atts = gff.column_9_dict(cols[8]) if args.key in atts and atts[args.key] in changes: atts[args.attribute] = changes[atts[args.key]] cols[8] = gff.build_column_9_from_dict(atts) outfh.write("\t".join(cols) + "\n")
def main(): parser = argparse.ArgumentParser( description='Updates 9th-column key/value pairs in GFF file using a batch-update file') parser.add_argument('-i', '--input_file', type=str, required=True, help='A GFF3 file' ) parser.add_argument('-o', '--output_file', type=str, required=True, help='Path to an output file to be created' ) parser.add_argument('-t', '--type', type=str, required=False, help='Filter rows updated based on the 3rd (type) column' ) args = parser.parse_args() outfh = open(args.output_file, 'wt') chr_lst = {} cdsexon_count = {} for line in open(args.input_file): cols = line.rstrip().split("\t") if len(cols) == 9: if args.type is None or args.type == cols[2]: #init. incrementeur for a new chromosome chr = cols[0] if not chr in chr_lst: chr_lst[chr] = None incr = 0 #first feature must be "gene", init. first gene by 000010 feature = cols[2] if feature == 'gene': incr += 10 #split col 9 atts = gff.column_9_dict(cols[8]) if 'Name' in atts: atts.pop('Name') #change values if feature == 'gene': old_id = atts['ID'] gene_id = chr + '_' + format(incr, '06d') atts['ID'] = gene_id new_id = gene_id print(old_id, new_id) # assume no isoforms elif feature in ['mRNA', 'tRNA']: mRNA_id = gene_id + '.1' atts['ID'] = mRNA_id atts['Parent'] = gene_id cdsexon_count[mRNA_id] = 1 elif feature in ['CDS','exon']: cdsexon_id = mRNA_id + '.' + str(cdsexon_count[mRNA_id]) atts['Parent'] = mRNA_id atts['ID'] = cdsexon_id if feature == 'CDS': atts['ID'] = 'CDS:' + cdsexon_id # allow exon / cds switch position if not cdsexon_id in cdsexon_count: cdsexon_count[cdsexon_id] = None else: cdsexon_count[mRNA_id] += 1 cols[8] = gff.build_column_9_from_dict(atts) outfh.write("\t".join(cols) + "\n")