コード例 #1
0
def main():
    parser = argparse.ArgumentParser( description='Updates 9th-column key/value pairs in GFF file using a batch-update file')

    ## output file to be written
    parser.add_argument('-i', '--input_file', type=str, required=True, help='A GFF3 file' )
    parser.add_argument('-u', '--update_file', type=str, required=True, help='A two-column file (FeatureID, value)' )
    parser.add_argument('-a', '--attribute', type=str, required=True, help='The attribute value to update' )
    parser.add_argument('-o', '--output_file', type=str, required=True, help='Path to an output file to be created' )
    parser.add_argument('-k', '--key', type=str, required=False, default='ID', help='Which key in the 9th column helps identify the row to be updated?' )
    parser.add_argument('-t', '--type', type=str, required=False, help='Filter rows updated based on the 3rd (type) column' )
    args = parser.parse_args()

    # protect the user
    if args.input_file == args.output_file:
        raise Exception("ERROR:  Don't set --input_file and --output_file to be the same thing.  Bad things will happen.  Bad Things.")

    outfh = open(args.output_file, 'wt')

    # first read in the values to be updated
    changes = dict()

    for line in open(args.update_file):
        cols = line.rstrip().split("\t")
        if len(cols) != 2:
            print("WARNING: Skipping the following update line because two columns were expected:\n{0}".format(line))
            continue

        changes[cols[0]] = cols[1]

    for line in open(args.input_file):
        cols = line.rstrip().split("\t")

        if len(cols) == 9:
            if args.type is None or args.type == cols[2]:
                atts = gff.column_9_dict(cols[8])
            
                if args.key in atts and atts[args.key] in changes:
                    atts[args.attribute] = changes[atts[args.key]]
                    cols[8] = gff.build_column_9_from_dict(atts)

        outfh.write("\t".join(cols) + "\n")
コード例 #2
0
def main():
    parser = argparse.ArgumentParser( description='Put a description of your script here')

    ## output file to be written
    parser.add_argument('-i', '--input_file', type=str, required=True, help='Path to an input file to be read' )
    parser.add_argument('-o', '--output_file', type=str, required=True, help='Path to an output file to be created' )
    parser.add_argument('-t', '--feature_type', type=str, required=False, help='Only update features of a given type (gff column 3)' )
    parser.add_argument('-k', '--key', type=str, required=True, help='Which column 9 key/attribute to update values for?' )
    parser.add_argument('-a', '--appended_text', type=str, required=True, help='Text to append' )
    args = parser.parse_args()

    # protect the user
    if args.input_file == args.output_file:
        raise Exception("ERROR:  Don't set --input_file and --output_file to be the same thing.  Bad things will happen.  Bad Things.")

    ofh = open(args.output_file, 'wt')
    replacement_count = 0

    for line in open(args.input_file):
        line = line.rstrip()
        cols = line.split("\t")

        if len(cols) == 9:
            if args.feature_type is None or args.feature_type == cols[2]:
                col9 = gff.column_9_dict(cols[8])
                if args.key in col9:
                    col9[args.key] = "{0}{1}".format(col9[args.key], args.appended_text)
                    cols[8] = gff.build_column_9_from_dict(col9)
                    replacement_count += 1

                ofh.write("{0}\t{1}\n".format("\t".join(cols[0:8]), cols[8]))
            else:
                ofh.write("{0}\n".format(line))
        else:
            ofh.write("{0}\n".format(line))

    print("INFO: Made {0} replacements in the file".format(replacement_count))
コード例 #3
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Updates 9th-column key/value pairs in GFF file using a batch-update file'
    )

    ## output file to be written
    parser.add_argument('-i',
                        '--input_file',
                        type=str,
                        required=True,
                        help='A GFF3 file')
    parser.add_argument('-u',
                        '--update_file',
                        type=str,
                        required=True,
                        help='A two-column file (FeatureID, value)')
    parser.add_argument('-a',
                        '--attribute',
                        type=str,
                        required=True,
                        help='The attribute value to update')
    parser.add_argument('-o',
                        '--output_file',
                        type=str,
                        required=True,
                        help='Path to an output file to be created')
    parser.add_argument(
        '-k',
        '--key',
        type=str,
        required=False,
        default='ID',
        help='Which key in the 9th column helps identify the row to be updated?'
    )
    parser.add_argument(
        '-t',
        '--type',
        type=str,
        required=False,
        help='Filter rows updated based on the 3rd (type) column')
    args = parser.parse_args()

    # protect the user
    if args.input_file == args.output_file:
        raise Exception(
            "ERROR:  Don't set --input_file and --output_file to be the same thing.  Bad things will happen.  Bad Things."
        )

    outfh = open(args.output_file, 'wt')

    # first read in the values to be updated
    changes = dict()

    for line in open(args.update_file):
        cols = line.rstrip().split("\t")
        if len(cols) != 2:
            print(
                "WARNING: Skipping the following update line because two columns were expected:\n{0}"
                .format(line))
            continue

        changes[cols[0]] = cols[1]

    for line in open(args.input_file):
        cols = line.rstrip().split("\t")

        if len(cols) == 9:
            if args.type is None or args.type == cols[2]:
                atts = gff.column_9_dict(cols[8])

                if args.key in atts and atts[args.key] in changes:
                    atts[args.attribute] = changes[atts[args.key]]
                    cols[8] = gff.build_column_9_from_dict(atts)

        outfh.write("\t".join(cols) + "\n")
コード例 #4
0
ファイル: rename_id_gff3.py プロジェクト: AllanSSX/Scratch
def main():
	parser = argparse.ArgumentParser( description='Updates 9th-column key/value pairs in GFF file using a batch-update file')
	
	parser.add_argument('-i', '--input_file', type=str, required=True, help='A GFF3 file' )
	parser.add_argument('-o', '--output_file', type=str, required=True, help='Path to an output file to be created' )
	parser.add_argument('-t', '--type', type=str, required=False, help='Filter rows updated based on the 3rd (type) column' )
	
	args = parser.parse_args()
	
	outfh = open(args.output_file, 'wt')
	
	chr_lst = {}
	cdsexon_count = {}
	
	for line in open(args.input_file):
		cols = line.rstrip().split("\t")

		if len(cols) == 9:
			if args.type is None or args.type == cols[2]:
				
				
				#init. incrementeur for a new chromosome
				chr = cols[0]
				if not chr in chr_lst:
					chr_lst[chr] = None
					incr = 0
				#first feature must be "gene", init. first gene by 000010
				feature = cols[2]
				if feature == 'gene':
					incr += 10
				
				#split col 9
				atts = gff.column_9_dict(cols[8])
				if 'Name' in atts:
					atts.pop('Name')
				
				#change values
				if feature == 'gene':
					old_id = atts['ID']
					
					gene_id = chr + '_' + format(incr, '06d')
					atts['ID'] = gene_id
					
					new_id = gene_id
					
					print(old_id, new_id)
				
				# assume no isoforms
				elif feature in ['mRNA', 'tRNA']:
					
					mRNA_id = gene_id + '.1'
					
					atts['ID'] = mRNA_id
					atts['Parent'] = gene_id
					
					cdsexon_count[mRNA_id] = 1
					
				elif feature in ['CDS','exon']:
					cdsexon_id = mRNA_id + '.' + str(cdsexon_count[mRNA_id])
					
					atts['Parent'] = mRNA_id
					atts['ID'] = cdsexon_id
					
					if feature == 'CDS':
						atts['ID'] = 'CDS:' + cdsexon_id
					
					# allow exon / cds switch position 
					if not cdsexon_id in cdsexon_count:
						cdsexon_count[cdsexon_id] = None
					else:
						cdsexon_count[mRNA_id] += 1

				cols[8] = gff.build_column_9_from_dict(atts)

		outfh.write("\t".join(cols) + "\n")