Beispiel #1
0
def transferFeatures(hits):
    """
    In table: feature_cvterm
    RILEY              /class
    genedb_products    /product
    
    In table: featureprop
    EC_number          /EC_number
    colour             /colour
    gene               /gene
    """
    # Connect to geneDB as read only user using ropy.query
    query = ropy.query.QueryProcessor(connection=connectionFactory)
    query.setSQLFilePath(os.path.dirname(__file__) + "/sql/")
    
    for hit in hits:
        # Extract all cvterm related to a feature_id from feature_cvterm table
        query.addQueryFromFile("feature_cvterm_query", "get_cvterm_from_feature_cvterm.sql")
        feature_cvterm_rows = query.runQuery("feature_cvterm_query", (hits[hit],))
        logger.debug("--- %s" % hit)
        logger.debug('/ortholog="%s"' % hits[hit])
        for row in feature_cvterm_rows:
            cvterm_name = row[0]
            cv_name = row[1]
            if cv_name == "RILEY":
                logger.debug('/class="%s"' % (cvterm_name))
            elif cv_name == "genedb_products":
                logger.debug('/product="%s"' % (cvterm_name))
        # Extract all cvterm relected to a feature_id from featureprop
        query.addQueryFromFile("featureprop_query", "get_cvterm_from_featureprop.sql")
        featureprop_rows = query.runQuery("featureprop_query", (hits[hit],))
        for row in featureprop_rows:
            logger.debug('/%s="%s"' % (row[0], row[1]))
    logger.info("Features transfered")
async def on_ready():
    logger.debug(client.guilds)
    logger.info(f"{client.user} has connected to Discord!")
    # Overwrite the post function after Discord client initized
    for v in chats.videos:
        v.send = discord_notify(int(v.chid))
    await chats.main()
 def __init__(self, state=False, state_file="./state", **kwargs):
     self.videos = []
     # save the list of videos id into files
     self.state = state
     self.state_file = state_file
     if state:
         self.load_state(**kwargs)
         logger.debug(f"State will save to {self.state_file} while checking")
async def on_message(message):
    # Only read command exclude bot itself
    if message.author == client.user:
        return
    if not message.content.startswith(".synchat"):
        return

    # if no args
    if not message.content.startswith(".synchat "):
        await message.channel.send("```" + parser.format_help() + "```")
        return

    # read command and videoid
    logger.debug(message.content)
    try:
        args = parser.parse_args(message.content.split()[1:])
    except BaseException as e:
        # Fix this in Python3.9
        logger.warning(str(type(e)) + str(e))
        await message.channel.send("```" + parser.format_help() + "```")
        return

    method, id = args.method, args.id
    dc_channel = message.channel.id

    # list monitor list
    if method == "list":
        ids = [v.ytid for v in chats.videos if v.chid == str(dc_channel)]
        await message.channel.send("sync list: " + ",".join(ids))
        return

    # id cannot be null if user wants to start or stop the chat
    if id is None:
        await message.channel.send("Fail: No video ID provieded")
        return

    # start to monitor
    if method == "start":
        logger.info(f"Sync {id} to {dc_channel}")
        if chats.add_video(id,
                           dc_channel,
                           discord_notify(dc_channel),
                           save=True,
                           chat_folder=chat_folder):
            await message.channel.send(f"OK {id}")
        else:
            await message.channel.send(f"Fail to add {id}")

    # stop monitor
    elif method == "stop":
        ok = await chats.remove_video(id, dc_channel)
        if ok:
            await message.channel.send("OK")
        else:
            await message.channel.send(f"No {id} found")
    else:
        await message.channel.send(f"{method} not implemented")
    async def post(self, chatdata):
        for c in chatdata.items:
            if self.save:
                with open(self.folder + self.id + ".data", "a") as f:
                    f.write(c.json() + "\n")

            if c.type != "textMessage" or self.normal_msg:
                logger.debug("post")
                await self.send(c)

            if self.live:
                await chatdata.tick_async()
 async def remove_video(self, id, channel=""):
     if channel:
         id = str(channel) + "." + id
     videos = []
     for chat in self.videos:
         if chat.id == id:
             logger.debug(f"Remove {chat.id}")
             await chat.close()
         else:
             videos.append(chat)
     if len(videos) == len(self.videos):
         return False
     self.videos = videos
     return True
Beispiel #7
0
def splitSeqWithBiopython(embl, type):
    """
    Split sequence into separate file based on CDS features into sequences/ directory
    using Biopython
    
    """
    util.checkFile(embl)
    # Create directory sequences/
    dirname = "sequences/"
    util.createDir(dirname)
    record = SeqIO.read(open(embl, "rU"), "embl")
    if len(record.features) == 0:
        sys.exit("ERROR: EMBL file %s without features" % embl)
    for feature in record.features:
        if feature.type == 'CDS':
            seq = record.seq
            
            # Build up a list of (start,end) tuples that will be used to slice the sequence
            locations = []
            # If there are sub_features, then this gene is made up of multiple parts.  
            if len(feature.sub_features): 
                for sf in feature.sub_features:
                    locations.append((sf.location.start.position, sf.location.end.position))
            # This gene is made up of one part.  Store its start and end position.
            else:
                locations.append((feature.location.start.position, feature.location.end.position))

            # Store the joined sequence and nucleotide indices forming the CDS.
            seq_str = '' 
            for begin, end in locations:
                seq_str += seq[begin:end].tostring()

            # Reverse complement the sequence if the CDS is on the minus strand  
            if feature.strand == -1:  
                seq_obj = Seq(seq_str, IUPAC.ambiguous_dna)
                seq_str = seq_obj.reverse_complement().tostring()
            
            logger.debug(feature)
            logger.debug(SeqRecord(seq=Seq(seq_str), id=feature.qualifiers['systematic_id'][0], description=feature.type).format('fasta'))
              
    logger.info("Sequences extracted into %s" % dirname) 
    async def send(c):
        if type(c) is str:
            await client.get_channel(channel).send(c)
            return

        logger.debug(f"Print data: {str(c.json())}")
        # If new member: message = join message
        if c.amountString:
            text = f"[{c.amountString}]\n{c.message}"
        elif c.type != "textMessage":
            text = f"[{c.message}]"
        else:
            text = f"{c.message}"
        dtime = datetime.utcfromtimestamp(c.timestamp / 1000)
        # name, color(ARGB) and time
        embed = discord.Embed(title=c.author.name,
                              colour=c.bgColor % 0x1000000,
                              description=text,
                              timestamp=dtime)
        # thumbnail
        embed.set_thumbnail(url=c.author.imageUrl)
        # send
        await client.get_channel(channel).send(embed=embed)
async def console_print(c):
    if type(c) is str:
        logger.info(c)
    else:
        logger.debug(f"Print data: {str(c.json())}")
Beispiel #10
0
def main():
    # Fasta file extension: 
    # .ffn for the untranslated nucleotide sequences for each CDS; .faa for protein coding sequences (CDS)
    # .fa for the fasta alignment results
    # .fna for whole genomic DNA sequences; .frn for nucleotide sequences of RNA related features
    usage = "usage: %prog [Options]"
    parser = OptionParser(usage=usage)
    parser.add_option("-d", "--dna", metavar="FILE", help="input dna FILE in fasta format", action="store", type="string", dest="dna")
    parser.add_option("-t", "--tab", metavar="FILE", help="input tab FILE in embl format", action="store", type="string", dest="tab")
    parser.add_option("-e", "--embl", metavar="FILE", help="input embl FILE with CDS features in embl format", action="store", type="string", dest="embl")
    parser.add_option("--genedb", help="extract reference genome protein sequences from geneDB", action="store_true", dest="db")
    parser.add_option("--fasta", help="run fasta against each extracted in-house genomes", action="store_true", dest="fasta")
    parser.add_option("--hamap", help="run pfscan against HAMAP profiles", action="store_true", dest="hamap")
    parser.add_option("--clean", help="delete all results without deleting reference genomes", action="store_true", dest="clean")
    parser.add_option("--deepclean", help="delete all reference genomes and results", action="store_true", dest="deepclean")
    (options, args) = parser.parse_args()
    # Print help if no argument given
    if util.printHelp(options):
        parser.print_help()
        sys.exit()
    # Print command line
    cmdline = "$ python "
    for argv in sys.argv:
        cmdline += argv + " " 
    logger.debug(cmdline)
    
    # >>> ---------------------------------------------------------------------
    # >>> DATA PREPARATION
    # >>> ---------------------------------------------------------------------
    # List of needed software
    for softname in soft_lists:
        util.checkSoft(softname)
    # Prepare new genome data
    if options.dna and options.tab and not options.embl:
        util.checkFile(options.dna)
        mygenome_emblfile = fasta2embl(options.dna)
        mygenome_emblfile_withcds = concatFeatures(mygenome_emblfile, options.tab)
        splitSeq(mygenome_dir, mygenome_emblfile_withcds, "CDS")
        translateSeq(mygenome_dir)
    elif not options.dna and not options.tab and options.embl:
        mygenome_emblfile_withcds = options.embl
        splitSeq(mygenome_dir, mygenome_emblfile_withcds, "CDS")
        #splitSeqWithBiopython(mygenome_emblfile_withcds, "CDS") # does not work with testdata_01
        translateSeq(mygenome_dir)
    elif not options.deepclean:
        util.checkDir(mygenome_dir)
    # Extract in house genomes from chado db
    if options.db:
        chadoDump(refgenomes_dir)
    elif not options.deepclean:
        util.checkDir(refgenomes_dir)
    # bsub output directory
    if IS_LSF and not (options.clean or options.deepclean):
        util.createDir(bsub_dir)

    # >>> ---------------------------------------------------------------------
    # >>> ORTHOLOG SEARCH
    # >>> ---------------------------------------------------------------------
    # Run fasta & reciprocal fasta
    if options.fasta:
        runFasta(mygenome_dir, refgenomes_dir, fasta_dir)
        fasta_hits = topFastaHits(fasta_dir, refgenomes_extractedseq_dir)
        concatSeq(mygenome_fastafile_allcds, mygenome_dir)
        runReciprocalFasta(refgenomes_extractedseq_dir, mygenome_fastafile_allcds, reciprocalfasta_dir)
        reciprocalfasta_hits = topReciprocalFastaHits(reciprocalfasta_dir)
        printMSPCrunch(fasta_hits, reciprocalfasta_hits)
        hits = getHits(fasta_hits, reciprocalfasta_hits)
        logger.info("ORTHOLOGS")
        logger.info(hits['ortholog'])
        logger.info("SIMILARITY")
        logger.info(hits['similarity'])
        transferFeatures(hits['ortholog'])
    # Run hamap scan
    if options.hamap:
        runHamapScan(mygenome_dir, hamap_dir)

    # >>> ---------------------------------------------------------------------
    # >>> CLEANING OUTPUT DATA
    # >>> ---------------------------------------------------------------------
    # Clean results before a re-run
    if options.clean:
        # fasta results
        util.rmDir(fasta_dir)
        util.rmDir(reciprocalfasta_dir)
        util.rmDir(refgenomes_extractedseq_dir)
        util.rmFile(mygenome_fastafile_allcds)
        # hamap results
        util.rmDir(hamap_dir)
        # bsub outputs
        if IS_LSF:
            util.rmDir(bsub_dir)
    # Deep clean - remove all
    if options.deepclean:
        util.rmDir(refgenomes_dir)
        util.rmDir(mygenome_dir)
        util.rmDir(fasta_dir)
        util.rmDir(reciprocalfasta_dir)
        util.rmDir(refgenomes_extractedseq_dir)
        util.rmFile(mygenome_fastafile_allcds)
        util.rmDir(hamap_dir)
Beispiel #11
0
i = 0
qdf = None
j = 0
tot_query_answering_time = 0
start = time.time()
for qname, q in queries:
    logger.info("Query :\n{}\n".format(q))
    ####Execute Query and obtain result
    start_query = time.time()
    cur.execute(q)
    tot_query_answering_time += (time.time() - start_query)
    res = cur.fetchall()
    res_df = pd.DataFrame(res)
    res_df = res_df.set_index(np.arange(i, i + res_df.shape[0]))
    if res_df.empty:
        logger.debug("Query is empty")
        j += 1
        continue
    pr = Parser()
    qv = QueryVectorizer(set(df['column_name'].tolist()))
    #Begin parsing the query and vectorizing its parameters
    pr.parse(q)
    dict_obj = pr.get_vector()
    proj_list = pr.get_projections()
    logger.debug("List of Projections : \n {}".format(proj_list))
    rename_names = {
        key: value
        for key in res_df.columns for value in proj_list
        if value.split('_')[0] in key
    }
    res_df = res_df.rename(columns=rename_names)