Python column_9_dict Exemples, biocode.gff.column_9_dict Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : merge_predicted_gff3.py Projet : qclian/biocode

def add_barrnap_features(assemblies, features, barrnap_gff):
    for line in open(barrnap_gff):
        if line.startswith('#'):
            continue

        cols = line.split("\t")

        if len(cols) == 9:
            if cols[0] in assemblies:
                current_assembly = assemblies[cols[0]]
            else:
                current_assembly = things.Assembly(id=cols[0], residues='')
                assemblies[cols[0]] = current_assembly

            if cols[2] == 'rRNA':
                atts = gff.column_9_dict(cols[8])
                feat_base = "rRNA_{0}".format(uuid.uuid4())
                gene_id = "{0}_gene".format(feat_base)
                rRNA_id = "{0}_rRNA".format(feat_base)

                rfmin = int(cols[3]) - 1
                rfmax = int(cols[4])

                if cols[6] == '-':
                    rstrand = -1
                elif cols[6] == '+':
                    rstrand = 1
                else:
                    rstrand = 0

                gene = things.Gene(id=gene_id)
                gene.locate_on(target=current_assembly,
                               fmin=rfmin,
                               fmax=rfmax,
                               strand=rstrand)
                features[gene_id] = gene
                current_assembly.add_gene(gene)

                rRNA = things.rRNA(id=rRNA_id, parent=gene)
                rRNA.locate_on(target=current_assembly,
                               fmin=rfmin,
                               fmax=rfmax,
                               strand=rstrand)
                gene.add_rRNA(rRNA)
                rRNA.annotation = gff.parse_annotation_from_column_9(cols[8])
                features[rRNA_id] = rRNA

Exemple #2

0

Afficher le fichier

Fichier : update_selected_column9_values.py Projet : jorvis/biocode

def main():
    parser = argparse.ArgumentParser( description='Updates 9th-column key/value pairs in GFF file using a batch-update file')

    ## output file to be written
    parser.add_argument('-i', '--input_file', type=str, required=True, help='A GFF3 file' )
    parser.add_argument('-u', '--update_file', type=str, required=True, help='A two-column file (FeatureID, value)' )
    parser.add_argument('-a', '--attribute', type=str, required=True, help='The attribute value to update' )
    parser.add_argument('-o', '--output_file', type=str, required=True, help='Path to an output file to be created' )
    parser.add_argument('-k', '--key', type=str, required=False, default='ID', help='Which key in the 9th column helps identify the row to be updated?' )
    parser.add_argument('-t', '--type', type=str, required=False, help='Filter rows updated based on the 3rd (type) column' )
    args = parser.parse_args()

    # protect the user
    if args.input_file == args.output_file:
        raise Exception("ERROR:  Don't set --input_file and --output_file to be the same thing.  Bad things will happen.  Bad Things.")

    outfh = open(args.output_file, 'wt')

    # first read in the values to be updated
    changes = dict()

    for line in open(args.update_file):
        cols = line.rstrip().split("\t")
        if len(cols) != 2:
            print("WARNING: Skipping the following update line because two columns were expected:\n{0}".format(line))
            continue

        changes[cols[0]] = cols[1]

    for line in open(args.input_file):
        cols = line.rstrip().split("\t")

        if len(cols) == 9:
            if args.type is None or args.type == cols[2]:
                atts = gff.column_9_dict(cols[8])
            
                if args.key in atts and atts[args.key] in changes:
                    atts[args.attribute] = changes[atts[args.key]]
                    cols[8] = gff.build_column_9_from_dict(atts)

        outfh.write("\t".join(cols) + "\n")

Exemple #3

0

Afficher le fichier

Fichier : append_to_column_9_value.py Projet : yangjie4546/biocode

def main():
    parser = argparse.ArgumentParser( description='Put a description of your script here')

    ## output file to be written
    parser.add_argument('-i', '--input_file', type=str, required=True, help='Path to an input file to be read' )
    parser.add_argument('-o', '--output_file', type=str, required=True, help='Path to an output file to be created' )
    parser.add_argument('-t', '--feature_type', type=str, required=False, help='Only update features of a given type (gff column 3)' )
    parser.add_argument('-k', '--key', type=str, required=True, help='Which column 9 key/attribute to update values for?' )
    parser.add_argument('-a', '--appended_text', type=str, required=True, help='Text to append' )
    args = parser.parse_args()

    # protect the user
    if args.input_file == args.output_file:
        raise Exception("ERROR:  Don't set --input_file and --output_file to be the same thing.  Bad things will happen.  Bad Things.")

    ofh = open(args.output_file, 'wt')
    replacement_count = 0

    for line in open(args.input_file):
        line = line.rstrip()
        cols = line.split("\t")

        if len(cols) == 9:
            if args.feature_type is None or args.feature_type == cols[2]:
                col9 = gff.column_9_dict(cols[8])
                if args.key in col9:
                    col9[args.key] = "{0}{1}".format(col9[args.key], args.appended_text)
                    cols[8] = gff.build_column_9_from_dict(col9)
                    replacement_count += 1

                ofh.write("{0}\t{1}\n".format("\t".join(cols[0:8]), cols[8]))
            else:
                ofh.write("{0}\n".format(line))
        else:
            ofh.write("{0}\n".format(line))

    print("INFO: Made {0} replacements in the file".format(replacement_count))

Exemple #4

0

Afficher le fichier

def main():
    parser = argparse.ArgumentParser(
        description=
        'Updates 9th-column key/value pairs in GFF file using a batch-update file'
    )

    ## output file to be written
    parser.add_argument('-i',
                        '--input_file',
                        type=str,
                        required=True,
                        help='A GFF3 file')
    parser.add_argument('-u',
                        '--update_file',
                        type=str,
                        required=True,
                        help='A two-column file (FeatureID, value)')
    parser.add_argument('-a',
                        '--attribute',
                        type=str,
                        required=True,
                        help='The attribute value to update')
    parser.add_argument('-o',
                        '--output_file',
                        type=str,
                        required=True,
                        help='Path to an output file to be created')
    parser.add_argument(
        '-k',
        '--key',
        type=str,
        required=False,
        default='ID',
        help='Which key in the 9th column helps identify the row to be updated?'
    )
    parser.add_argument(
        '-t',
        '--type',
        type=str,
        required=False,
        help='Filter rows updated based on the 3rd (type) column')
    args = parser.parse_args()

    # protect the user
    if args.input_file == args.output_file:
        raise Exception(
            "ERROR:  Don't set --input_file and --output_file to be the same thing.  Bad things will happen.  Bad Things."
        )

    outfh = open(args.output_file, 'wt')

    # first read in the values to be updated
    changes = dict()

    for line in open(args.update_file):
        cols = line.rstrip().split("\t")
        if len(cols) != 2:
            print(
                "WARNING: Skipping the following update line because two columns were expected:\n{0}"
                .format(line))
            continue

        changes[cols[0]] = cols[1]

    for line in open(args.input_file):
        cols = line.rstrip().split("\t")

        if len(cols) == 9:
            if args.type is None or args.type == cols[2]:
                atts = gff.column_9_dict(cols[8])

                if args.key in atts and atts[args.key] in changes:
                    atts[args.attribute] = changes[atts[args.key]]
                    cols[8] = gff.build_column_9_from_dict(atts)

        outfh.write("\t".join(cols) + "\n")

Exemple #5

0

Afficher le fichier

Fichier : rename_id_gff3.py Projet : AllanSSX/Scratch

def main():
	parser = argparse.ArgumentParser( description='Updates 9th-column key/value pairs in GFF file using a batch-update file')
	
	parser.add_argument('-i', '--input_file', type=str, required=True, help='A GFF3 file' )
	parser.add_argument('-o', '--output_file', type=str, required=True, help='Path to an output file to be created' )
	parser.add_argument('-t', '--type', type=str, required=False, help='Filter rows updated based on the 3rd (type) column' )
	
	args = parser.parse_args()
	
	outfh = open(args.output_file, 'wt')
	
	chr_lst = {}
	cdsexon_count = {}
	
	for line in open(args.input_file):
		cols = line.rstrip().split("\t")

		if len(cols) == 9:
			if args.type is None or args.type == cols[2]:
				
				
				#init. incrementeur for a new chromosome
				chr = cols[0]
				if not chr in chr_lst:
					chr_lst[chr] = None
					incr = 0
				#first feature must be "gene", init. first gene by 000010
				feature = cols[2]
				if feature == 'gene':
					incr += 10
				
				#split col 9
				atts = gff.column_9_dict(cols[8])
				if 'Name' in atts:
					atts.pop('Name')
				
				#change values
				if feature == 'gene':
					old_id = atts['ID']
					
					gene_id = chr + '_' + format(incr, '06d')
					atts['ID'] = gene_id
					
					new_id = gene_id
					
					print(old_id, new_id)
				
				# assume no isoforms
				elif feature in ['mRNA', 'tRNA']:
					
					mRNA_id = gene_id + '.1'
					
					atts['ID'] = mRNA_id
					atts['Parent'] = gene_id
					
					cdsexon_count[mRNA_id] = 1
					
				elif feature in ['CDS','exon']:
					cdsexon_id = mRNA_id + '.' + str(cdsexon_count[mRNA_id])
					
					atts['Parent'] = mRNA_id
					atts['ID'] = cdsexon_id
					
					if feature == 'CDS':
						atts['ID'] = 'CDS:' + cdsexon_id
					
					# allow exon / cds switch position 
					if not cdsexon_id in cdsexon_count:
						cdsexon_count[cdsexon_id] = None
					else:
						cdsexon_count[mRNA_id] += 1

				cols[8] = gff.build_column_9_from_dict(atts)

		outfh.write("\t".join(cols) + "\n")