コード例 #1
0
ファイル: val2err.py プロジェクト: pajanne/rococo
def main():
    usage = "usage: %prog [Options]"
    parser = OptionParser(usage=usage)
    parser.add_option("-o", metavar="EXTENSION", help="val EXTENSION", action="store", type="string", dest="old")
    parser.add_option("-n", metavar="EXTENSION", help="err EXTENSION", action="store", type="string", dest="new")
    parser.add_option("--extract", help="Extract ERRORs only", action="store_true", dest="extract")

    (options, args) = parser.parse_args()
    
    # Print help if no argument given
    if util.printHelp(options):
        parser.print_help()
        sys.exit()
    
    for file in os.listdir('.'):
        if options.old in file:
            oldfile = file
            newfile = "%s.%s" % (oldfile.split(".")[0], options.new)
            print "Convert file %s into %s" % (oldfile, newfile)
            if options.extract:
                cmd = "grep ERROR %s > %s" % (oldfile, newfile)
                try:
                    util.runProcess(cmd)
                except Exception, e:
                    print "Error to extract %s" % oldfile
                    print e
コード例 #2
0
ファイル: gendb.py プロジェクト: pajanne/rococo
def main():
    usage = "usage: %prog [Options]"
    parser = OptionParser(usage=usage)
    parser.add_option("-l", metavar="FILE", help="FILE containing the list of all organism common names and its associated sequence file", action="store", type="string", dest="list")
    
    (options, args) = parser.parse_args()

    # Print help if no argument given
    if util.printHelp(options):
        parser.print_help()
        sys.exit()
        
    # Read organism common name and related fasta sequence file
    list_file = options.list
    util.checkFile(list_file)
    for line in open(list_file, "r"):
        if line[0] == '!':
            continue
        if line.count('||') < 1:
            continue
        # ! common_name||sequence_file
        line = line.strip()
        values = line.split('||')
        common_name = values[0]
        input_file = values[1]
        #util.checkFile(input_file)
        doSubmit(common_name, input_file)
コード例 #3
0
ファイル: gbk2embl.py プロジェクト: pajanne/rococo
def main():
    usage = "usage: %prog [Options]"
    parser = OptionParser(usage=usage)
    parser.add_option("-o", metavar="EXTENSION", help="Old EXTENSION", action="store", type="string", dest="old")
    parser.add_option("-n", metavar="EXTENSION", help="New EXTENSION", action="store", type="string", dest="new")
    parser.add_option("--convert", help="Do convert genbank file into embl", action="store_true", dest="convert")

    (options, args) = parser.parse_args()
    
    # Print help if no argument given
    if util.printHelp(options):
        parser.print_help()
        sys.exit()
    
    for file in os.listdir('.'):
        if options.old in file:
            oldfile = file
            newfile = "%s.%s" % (oldfile.split(".")[0], options.new)
            print "Convert file %s into %s" % (oldfile, newfile)
            if options.convert:
                cmd = "seqret -sequence gb::%s -feature Yes -outseq embl::%s" % (oldfile, newfile)
                try:
                    util.runProcess(cmd)
                except Exception, e:
                    print "Error to convert %s" % oldfile
                    print e
コード例 #4
0
ファイル: union.py プロジェクト: pajanne/rococo
def main():
    usage = "usage: %prog [Options]"
    parser = OptionParser(usage=usage)
    parser.add_option("-l", metavar="FILE", help="FILE containing the list of all organism common names and its associated sequence file", action="store", type="string", dest="list")
    
    (options, args) = parser.parse_args()

    # Print help if no argument given
    if util.printHelp(options):
        parser.print_help()
        sys.exit()
        
    # Get and check input arguments
    if options.list:
        # Read organism common name and related fasta sequence file
        list_file = options.list
        util.checkFile(list_file)
        for line in open(list_file, "r"):
            if line[0] == '!':
                continue
            if line.count('||') < 1:
                continue
            # ! common_name||organim_name||strain||locus_tag||fasta_file
            line = line.strip()
            values = line.split('||')
            print "Processing %s" % values[0]
            union(file=values[4], common_name=values[0], locus_tag=values[3], organism_name=values[1], strain=values[2])           
コード例 #5
0
ファイル: embl_multiloader.py プロジェクト: pajanne/rococo
def main():
    usage = "usage: %prog [Options]"
    parser = OptionParser(usage=usage)
    parser.add_option("-l", "--list", metavar="FILE", help="FILE containing the list of all organism common names and its associated file to load", action="store", type="string", dest="list")
    parser.add_option("-D", action="store", dest="dbhost")

    (options, args) = parser.parse_args()
    
    # Print help if no argument given
    if util.printHelp(options):
        parser.print_help()
        sys.exit()
    
    # Print command line
    cmdline = "$ python "
    for argv in sys.argv:
        cmdline += argv + " " 
    logger.info(cmdline)
    
    # Print logger file info
    logger.info(logsetup.logpath)
    
    # Setup database connection
    host = ropy.util.getDArg("dbhost", raiseOnEmpty = True)
    database = ropy.util.getDArg("dbname", raiseOnEmpty = True)
    port = ropy.util.getDArg("dbport", raiseOnEmpty = True)
    user = ropy.util.getDArg("dbuser", raiseOnEmpty = True)
    #password = ropy.util.getDArg("dbpassword", raiseOnEmpty = True)
    
    # Check if chado_load is installed
    util.isSoftInstalled("chado_load")

    # Read organism common name and load related embl file into the database
    data_path = options.list
    for line in open(data_path, "r"):
        if line[0] == '!':
            continue
        if line.count('||') < 1:
            continue
        # ! common_name||taxon_id||filename
        line = line.strip()
        list = line.split('||')
        common_name = list[0]
        filename = list[2]
        util.checkFile(filename)
        # Loader command
        cmd = "chado_load embl -o %s -t contig -D %s:%s/%s -U %s %s" % (common_name, host, port, database, user, filename)
        # Run command
        util.runProcess(cmd)
コード例 #6
0
ファイル: ebi_projectid.py プロジェクト: pajanne/rococo
def main():
    usage = "usage: %prog [Options]"
    parser = OptionParser(usage=usage)
    parser.add_option("-l", metavar="FILE", help="FILE containing the list of all organism common names, its associated information", action="store", type="string", dest="list")
    parser.add_option("--submit", help="To submit data, not only checking locus_tag", action="store_true", dest="submit")
    
    (options, args) = parser.parse_args()

    # Print help if no argument given
    if util.printHelp(options):
        parser.print_help()
        sys.exit()
        
    # Get and check input arguments
    # Read organism common name and related fasta sequence file
    list_file = options.list
    util.checkFile(list_file)
    for line in open(list_file, "r"):
        if line[0] == '!':
            continue
        if line.count('||') < 1:
            continue
        # ! organism_name||strain||locus_tag||seq_size||seq_depth||dna_source||description
        line = line.strip()
        values = line.split('||')
        organism_name = values[0]
        strain = values[1]
        locus_tag = values[2]
        seq_size = values[3]
        seq_depth = values[4]
        if values[5] == 'GHP':
            dna_source = 'Gut Health Programme, Rowett Institute of Nutrition and Health, University of Aberdeen. http://www.rowett.ac.uk/divisions/ghp/'
        elif values[5] == 'INRA':
            dna_source = 'INRA Clermont-Ferrand-Theix. http://www.clermont.inra.fr/'
        elif values[5] == 'DSMZ':
            dna_source = 'Deutsche Sammlung von Mikroorganismen und Zellkulturen. GmbH http://www.dsmz.de/'
        elif values[5] == 'NCTC':
            dna_source = "Health Protection Agency's National Collection of Type Cultures. http://www.hpacultures.org.uk/"
        else:
            print "DNA source %s not found! Please provide details..." % values[5]
            continue
        
        #print dna_source
        description = values[6]
        doSubmit(organism_name=organism_name, strain=strain, locus_tag=locus_tag, 
                 seq_size=seq_size, seq_depth=seq_depth, dna_source=dna_source, 
                 description=description, submit=options.submit)
コード例 #7
0
ファイル: rast.py プロジェクト: pajanne/rococo
def main():
    usage = "usage: %prog [Options]"
    parser = OptionParser(usage=usage)
    parser.add_option("-o", metavar="NAME", help="organism common name", action="store", type="string", dest="name")
    parser.add_option("-i", metavar="FILE", help="input organism sequence file in FASTA format", action="store", type="string", dest="input")
    parser.add_option("-j", metavar="ID", help="input job ID to fetch results", action="store", type="string", dest="jobid")
    parser.add_option("-l", metavar="FILE", help="FILE containing the list of all organism common names and its associated sequence file", action="store", type="string", dest="list")
    parser.add_option("--fetch", help="To fetch results, job id must be provided", action="store_true", dest="fetch")
    
    (options, args) = parser.parse_args()

    # Print help if no argument given
    if util.printHelp(options):
        parser.print_help()
        sys.exit()
        
    # Get and check input arguments
    if options.list:
        # Read organism common name and related fasta sequence file
        list_file = options.list
        util.checkFile(list_file)
        for line in open(list_file, "r"):
            if line[0] == '!':
                continue
            if line.count('||') < 1:
                continue
            # ! common_name||sequence_file
            line = line.strip()
            values = line.split('||')
            common_name = values[0]
            if options.fetch:
                job_id = values[2]
                doFetch(common_name, job_id)
            else:
                input_file = checkValidInput(values[1], common_name)           
                doSubmit(common_name, input_file)
    else:
        common_name = options.name
        if options.fetch:
                job_id = options.jobid
                doFetch(common_name, job_id)
        else:
            input_file = checkValidInput(options.input, common_name)           
            doSubmit(common_name, input_file)
コード例 #8
0
ファイル: genbank.py プロジェクト: pajanne/rococo
def main():
    usage = "usage: %prog [Options]"
    parser = OptionParser(usage=usage)
    parser.add_option("-l", metavar="FILE", help="FILE containing the list of all organism common names and its associated information (common_name||organim_name||strain||locus_tag||genome_project_id||coverage)", action="store", type="string", dest="list")
    parser.add_option("--convert", help="Do convert embl file into tbl", action="store_true", dest="convert")

    (options, args) = parser.parse_args()
    
    # Print help if no argument given
    if util.printHelp(options):
        parser.print_help()
        sys.exit()
    
    # Get and check input arguments
    if options.list:
        # Read organism common name and related locus tag
        list_file = options.list
        util.checkFile(list_file)
        for line in open(list_file, "r"):
            if line[0] == '!':
                continue
            if not line.count('||') == 6:
                continue
            # ! common_name||organim_name||strain||locus_tag||genome_project_id||coverage||source
            line = line.strip()
            values = line.split('||')
            common_name=values[0]
            locus_tag=values[3]

            embl_file = "../IMG/%s.4dep.embl" % common_name
            util.checkFile(embl_file)
            tbl_file = "%s.tbl" % common_name
            log.info("Convert file %s into %s" % (embl_file, tbl_file))
            if options.convert:
                try:
                    doConvert(embl_file, tbl_file, locus_tag)
                except Exception, e:
                    log.error("Converting %s" % embl_file)
                    log.error(traceback.extract_stack())
                    log.error(e)
コード例 #9
0
ファイル: gbk2tbl.py プロジェクト: pajanne/rococo
def main():
    usage = "usage: %prog [Options]"
    parser = OptionParser(usage=usage)
    parser.add_option("-l", metavar="FILE", help="FILE containing the list of all organism common names and its associated locus tag", action="store", type="string", dest="list")
    parser.add_option("--convert", help="Do convert genbank file into embl", action="store_true", dest="convert")

    (options, args) = parser.parse_args()
    
    # Print help if no argument given
    if util.printHelp(options):
        parser.print_help()
        sys.exit()
    
    # Get and check input arguments
    if options.list:
        # Read organism common name and related locus tag
        list_file = options.list
        util.checkFile(list_file)
        for line in open(list_file, "r"):
            if line[0] == '!':
                continue
            if line.count('||') < 1:
                continue
            # ! common_name||organim_name||strain||locus_tag||fasta_file
            line = line.strip()
            values = line.split('||')
            common_name=values[0]
            locus_tag=values[3]

            gbk_file = "%s.img.embl" % common_name
            util.checkFile(gbk_file)
            tbl_file = "%s.tbl" % common_name
            print "Convert file %s into %s" % (gbk_file, tbl_file)
            if options.convert:
                try:
                    doConvert(gbk_file, tbl_file, locus_tag)
                except Exception, e:
                    print "ERROR to convert %s" % gbk_file
                    print e
コード例 #10
0
ファイル: img.py プロジェクト: pajanne/rococo
def main():
    usage = "usage: %prog [Options]"
    parser = OptionParser(usage=usage)
    parser.add_option("-o", metavar="NAME", help="organism common name", action="store", type="string", dest="name")
    parser.add_option("-i", metavar="FILE", help="input organism sequence file in FASTA format", action="store", type="string", dest="input")
    parser.add_option("-p", metavar="ID", help="IMG project ID (GOLD Stamp ID)", action="store", type="string", dest="id")
    parser.add_option("-l", metavar="FILE", help="FILE containing the list of all organism common names, its associated sequence file and IMG project ID", action="store", type="string", dest="list")
    
    (options, args) = parser.parse_args()

    # Print help if no argument given
    if util.printHelp(options):
        parser.print_help()
        sys.exit()
        
    # Get and check input arguments
    if options.list:
        # Read organism common name and related fasta sequence file
        list_file = options.list
        util.checkFile(list_file)
        for line in open(list_file, "r"):
            if line[0] == '!':
                continue
            if line.count('||') < 1:
                continue
            # ! common_name||sequence_file
            line = line.strip()
            values = line.split('||')
            common_name = values[0]
            input_file = values[1]
            id = values[2]
            util.checkFile(input_file)
            doSubmit(common_name, input_file, id)
    else:
        common_name = options.name
        input_file = options.input
        id = options.id
        util.checkFile(input_file)
        doSubmit(common_name, input_file, id)
コード例 #11
0
ファイル: rename.py プロジェクト: pajanne/rococo
def main():
    usage = "usage: %prog [Options]"
    parser = OptionParser(usage=usage)
    parser.add_option("-o", metavar="EXTENSION", help="Old EXTENSION", action="store", type="string", dest="old")
    parser.add_option("-n", metavar="EXTENSION", help="New EXTENSION", action="store", type="string", dest="new")
    parser.add_option("--rename", help="Do rename", action="store_true", dest="rename")

    (options, args) = parser.parse_args()
    
    # Print help if no argument given
    if util.printHelp(options):
        parser.print_help()
        sys.exit()
    
    for file in os.listdir('.'):
        if options.old in file:
            oldfile = file
            newfile = "%s.%s" % (oldfile.split(".")[0], options.new)
            print "Rename old file %s into %s" % (oldfile, newfile)
            if options.rename:
                cmd = "mv %s %s" % (oldfile, newfile)
                util.runProcess(cmd)
    if not options.rename:
        print "To perform the action, please use --rename"
コード例 #12
0
def main():
    usage = "usage: %prog [Options]"
    parser = OptionParser(usage=usage)
    parser.add_option("-l", "--list", metavar="FILE", help="FILE containing the list of all organisms", action="store", type="string", dest="list")
    parser.add_option("-D", action="store", dest="dbhost")

    (options, args) = parser.parse_args()
    
    # Print help if no argument given
    if util.printHelp(options):
        parser.print_help()
        sys.exit()
    
    # Print command line
    cmdline = "$ python "
    for argv in sys.argv:
        cmdline += argv + " " 
    logger.info(cmdline)
    
    # Print logger file info
    logger.info(logsetup.logpath)
    
    # Setup database connection
    host = ropy.util.getDArg("dbhost", raiseOnEmpty = True)
    database = ropy.util.getDArg("dbname", raiseOnEmpty = True)
    port = ropy.util.getDArg("dbport", raiseOnEmpty = True)
    user = ropy.util.getDArg("dbuser", raiseOnEmpty = True)
    password = ropy.util.getDArg("dbpassword")
    connectionFactory = ropy.query.ConnectionFactory(host, database, user, password, port)
    
    # Read organism list file and load it into the database
    data_path = options.list
    for line in open(data_path, "r"):
        if line[0] == '!':
            continue
        if line.count('||') < 1:
            continue
        # ! Genus||species||strain||taxonId
        line = line.strip()
        list = line.split('||')
        genus = list[0]
        species = list[1].replace('sp.', 'unknown')
        strain = list[2]
        taxonid = list[3]
        
        # Load organism
        chado_species = "%s (%s)" % (species, strain)
        common_name = getCommonName(genus, species, strain)
        abbreviation = common_name
        comment = None
        logger.info(common_name)
        logger.info(db.makeOrganism(connectionFactory, genus, chado_species, abbreviation, common_name, comment))
        
        # Load translation table
        logger.info(db.makeOrganismProp(connectionFactory, genus, chado_species, "genedb_misc", "translationTable", 11))
        
        # Load taxonomy id
        logger.info(db.makeOrganismProp(connectionFactory, genus, chado_species, "genedb_misc", "taxonId", taxonid))
        
        # Load HTML name fields for GeneDB web
        htmlFullName = getHtmlFullName(genus, species, strain)
        logger.info(db.makeOrganismProp(connectionFactory, genus, chado_species, "genedb_misc", "htmlFullName", htmlFullName))
        htmlShortName = getHtmlShortName(genus, species, strain)
        logger.info(db.makeOrganismProp(connectionFactory, genus, chado_species, "genedb_misc", "htmlShortName", htmlShortName))