Python print_tbl_from_assemblies Examples, biocodetbl.print_tbl_from_assemblies Python Examples

Example #1

0

Show file

File: convert_gff3_to_ncbi_tbl.py Project: pombredanne/biocode

def main():
    parser = argparse.ArgumentParser( description='Create a TBL file for submission to NCBI from GFF3')

    ## output file to be written
    parser.add_argument('-i', '--input_file', type=str, required=True, help='Path to an input file to be read' )
    parser.add_argument('-o', '--output_base', type=str, required=True, help='Base name of output files to be created' )
    parser.add_argument('-ln', '--lab_name', type=str, required=True, help='Required by NCBI to identify the submitting group' )
    parser.add_argument('-nap', '--ncbi_acc_prefix', type=str, required=True, help='Required and assigned by NCBI' )
    parser.add_argument('-gf', '--genomic_fasta', type=str, required=False, help='FASTA file of genomic sequence, if not embedded in GFF' )
    parser.add_argument('-go', '--go_obo', type=str, required=False, help='GO terms will not be exported unless you pass the path to a GO OBO file')
    
    args = parser.parse_args()

    (assemblies, features) = biocodegff.get_gff3_features( args.input_file )
    
    if args.genomic_fasta is not None:
        biocodeutils.add_assembly_fasta(assemblies, args.genomic_fasta)
        
    new_assemblies = dict() 

    ## We need to first check the ID format
    reformat_IDs = True

    ## maps old IDs (like tp.assembly.567468735.1) to new ones (like AAGK01000001)
    asm_id_map = dict()
    asm_num = 1

    for asm_id in assemblies:
        # pre-formatted IDs are like this: gnl|WGS:XXXX|SeqID|gb|XXXX01xxxxxx
        if asm_id.startswith('gnl|WGS:'):
            reformat_IDs = False
            break
        else:
            new_id = "gnl|WGS:{0}|SeqID|gb|{0}01{1:06d}".format(args.ncbi_acc_prefix, asm_num)
            asm_id_map[asm_id] = new_id
            asm_num += 1
            new_assemblies[new_id] = assemblies[asm_id]
            new_assemblies[new_id].id = new_id

    if reformat_IDs == True:
        assemblies = new_assemblies

    # >gi|68352484|gb|AAGK01000001.1|
    # AAGK01000001	NC_007344.1	tp.assembly.567468735.1

    ofh = open("{0}.tbl".format(args.output_base), 'wt')
    biocodetbl.print_tbl_from_assemblies(assemblies=assemblies, ofh=ofh, go_obo=args.go_obo, lab_name=args.lab_name)

    mset = biothings.AssemblySet()
    mset.load_from_dict(assemblies)
    mset.write_fasta(path="{0}.fna".format(args.output_base))

Example #2

0

Show file

File: convert_gff3_to_ncbi_tbl.py Project: stefanoliver/biocode

def main():
    parser = argparse.ArgumentParser( description='Put a description of your script here')

    ## output file to be written
    parser.add_argument('-i', '--input_file', type=str, required=True, help='Path to an input file to be read' )
    parser.add_argument('-o', '--output_file', type=str, required=True, help='Path to an output file to be created' )
    parser.add_argument('-go', '--go_obo', type=str, required=False, help='GO terms will not be exported unless you pass the path to a GO OBO file')
    parser.add_argument('-ln', '--lab_name', type=str, required=True, help='Required by NCBI to identify the submitting group' )
    args = parser.parse_args()

    (assemblies, features) = biocodegff.get_gff3_features( args.input_file )
    
    ofh = open(args.output_file, 'wt')
    biocodetbl.print_tbl_from_assemblies(assemblies=assemblies, ofh=ofh, go_obo=args.go_obo, lab_name=args.lab_name)

Example #3

0

Show file

File: convert_gff3_to_ncbi_tbl.py Project: yuzhenpeng/biocode

def main():
    parser = argparse.ArgumentParser(
        description='Create a TBL file for submission to NCBI from GFF3')

    ## output file to be written
    parser.add_argument('-i',
                        '--input_file',
                        type=str,
                        required=True,
                        help='Path to an input file to be read')
    parser.add_argument('-o',
                        '--output_base',
                        type=str,
                        required=True,
                        help='Base name of output files to be created')
    parser.add_argument(
        '-ln',
        '--lab_name',
        type=str,
        required=True,
        help='Required by NCBI to identify the submitting group')
    parser.add_argument('-nap',
                        '--ncbi_acc_prefix',
                        type=str,
                        required=True,
                        help='Required and assigned by NCBI')
    parser.add_argument(
        '-gf',
        '--genomic_fasta',
        type=str,
        required=False,
        help='FASTA file of genomic sequence, if not embedded in GFF')
    parser.add_argument(
        '-go',
        '--go_obo',
        type=str,
        required=False,
        help=
        'GO terms will not be exported unless you pass the path to a GO OBO file'
    )

    args = parser.parse_args()

    (assemblies, features) = biocodegff.get_gff3_features(args.input_file)

    if args.genomic_fasta is not None:
        biocodeutils.add_assembly_fasta(assemblies, args.genomic_fasta)

    new_assemblies = dict()

    ## We need to first check the ID format
    reformat_IDs = True

    ## maps old IDs (like tp.assembly.567468735.1) to new ones (like AAGK01000001)
    asm_id_map = dict()
    asm_num = 1

    for asm_id in assemblies:
        # pre-formatted IDs are like this: gnl|WGS:XXXX|SeqID|gb|XXXX01xxxxxx
        if asm_id.startswith('gnl|WGS:'):
            reformat_IDs = False
            break
        else:
            new_id = "gnl|WGS:{0}|SeqID|gb|{0}01{1:06d}".format(
                args.ncbi_acc_prefix, asm_num)
            asm_id_map[asm_id] = new_id
            asm_num += 1
            new_assemblies[new_id] = assemblies[asm_id]
            new_assemblies[new_id].id = new_id

    if reformat_IDs == True:
        assemblies = new_assemblies

    ofh = open("{0}.tbl".format(args.output_base), 'wt')
    biocodetbl.print_tbl_from_assemblies(assemblies=assemblies,
                                         ofh=ofh,
                                         go_obo=args.go_obo,
                                         lab_name=args.lab_name)

    mset = biothings.AssemblySet()
    mset.load_from_dict(assemblies)
    mset.write_fasta(path="{0}.fna".format(args.output_base))