def transferFeatures(hits): """ In table: feature_cvterm RILEY /class genedb_products /product In table: featureprop EC_number /EC_number colour /colour gene /gene """ # Connect to geneDB as read only user using ropy.query query = ropy.query.QueryProcessor(connection=connectionFactory) query.setSQLFilePath(os.path.dirname(__file__) + "/sql/") for hit in hits: # Extract all cvterm related to a feature_id from feature_cvterm table query.addQueryFromFile("feature_cvterm_query", "get_cvterm_from_feature_cvterm.sql") feature_cvterm_rows = query.runQuery("feature_cvterm_query", (hits[hit],)) logger.debug("--- %s" % hit) logger.debug('/ortholog="%s"' % hits[hit]) for row in feature_cvterm_rows: cvterm_name = row[0] cv_name = row[1] if cv_name == "RILEY": logger.debug('/class="%s"' % (cvterm_name)) elif cv_name == "genedb_products": logger.debug('/product="%s"' % (cvterm_name)) # Extract all cvterm relected to a feature_id from featureprop query.addQueryFromFile("featureprop_query", "get_cvterm_from_featureprop.sql") featureprop_rows = query.runQuery("featureprop_query", (hits[hit],)) for row in featureprop_rows: logger.debug('/%s="%s"' % (row[0], row[1])) logger.info("Features transfered")
async def on_ready(): logger.debug(client.guilds) logger.info(f"{client.user} has connected to Discord!") # Overwrite the post function after Discord client initized for v in chats.videos: v.send = discord_notify(int(v.chid)) await chats.main()
def __init__(self, state=False, state_file="./state", **kwargs): self.videos = [] # save the list of videos id into files self.state = state self.state_file = state_file if state: self.load_state(**kwargs) logger.debug(f"State will save to {self.state_file} while checking")
async def on_message(message): # Only read command exclude bot itself if message.author == client.user: return if not message.content.startswith(".synchat"): return # if no args if not message.content.startswith(".synchat "): await message.channel.send("```" + parser.format_help() + "```") return # read command and videoid logger.debug(message.content) try: args = parser.parse_args(message.content.split()[1:]) except BaseException as e: # Fix this in Python3.9 logger.warning(str(type(e)) + str(e)) await message.channel.send("```" + parser.format_help() + "```") return method, id = args.method, args.id dc_channel = message.channel.id # list monitor list if method == "list": ids = [v.ytid for v in chats.videos if v.chid == str(dc_channel)] await message.channel.send("sync list: " + ",".join(ids)) return # id cannot be null if user wants to start or stop the chat if id is None: await message.channel.send("Fail: No video ID provieded") return # start to monitor if method == "start": logger.info(f"Sync {id} to {dc_channel}") if chats.add_video(id, dc_channel, discord_notify(dc_channel), save=True, chat_folder=chat_folder): await message.channel.send(f"OK {id}") else: await message.channel.send(f"Fail to add {id}") # stop monitor elif method == "stop": ok = await chats.remove_video(id, dc_channel) if ok: await message.channel.send("OK") else: await message.channel.send(f"No {id} found") else: await message.channel.send(f"{method} not implemented")
async def post(self, chatdata): for c in chatdata.items: if self.save: with open(self.folder + self.id + ".data", "a") as f: f.write(c.json() + "\n") if c.type != "textMessage" or self.normal_msg: logger.debug("post") await self.send(c) if self.live: await chatdata.tick_async()
async def remove_video(self, id, channel=""): if channel: id = str(channel) + "." + id videos = [] for chat in self.videos: if chat.id == id: logger.debug(f"Remove {chat.id}") await chat.close() else: videos.append(chat) if len(videos) == len(self.videos): return False self.videos = videos return True
def splitSeqWithBiopython(embl, type): """ Split sequence into separate file based on CDS features into sequences/ directory using Biopython """ util.checkFile(embl) # Create directory sequences/ dirname = "sequences/" util.createDir(dirname) record = SeqIO.read(open(embl, "rU"), "embl") if len(record.features) == 0: sys.exit("ERROR: EMBL file %s without features" % embl) for feature in record.features: if feature.type == 'CDS': seq = record.seq # Build up a list of (start,end) tuples that will be used to slice the sequence locations = [] # If there are sub_features, then this gene is made up of multiple parts. if len(feature.sub_features): for sf in feature.sub_features: locations.append((sf.location.start.position, sf.location.end.position)) # This gene is made up of one part. Store its start and end position. else: locations.append((feature.location.start.position, feature.location.end.position)) # Store the joined sequence and nucleotide indices forming the CDS. seq_str = '' for begin, end in locations: seq_str += seq[begin:end].tostring() # Reverse complement the sequence if the CDS is on the minus strand if feature.strand == -1: seq_obj = Seq(seq_str, IUPAC.ambiguous_dna) seq_str = seq_obj.reverse_complement().tostring() logger.debug(feature) logger.debug(SeqRecord(seq=Seq(seq_str), id=feature.qualifiers['systematic_id'][0], description=feature.type).format('fasta')) logger.info("Sequences extracted into %s" % dirname)
async def send(c): if type(c) is str: await client.get_channel(channel).send(c) return logger.debug(f"Print data: {str(c.json())}") # If new member: message = join message if c.amountString: text = f"[{c.amountString}]\n{c.message}" elif c.type != "textMessage": text = f"[{c.message}]" else: text = f"{c.message}" dtime = datetime.utcfromtimestamp(c.timestamp / 1000) # name, color(ARGB) and time embed = discord.Embed(title=c.author.name, colour=c.bgColor % 0x1000000, description=text, timestamp=dtime) # thumbnail embed.set_thumbnail(url=c.author.imageUrl) # send await client.get_channel(channel).send(embed=embed)
async def console_print(c): if type(c) is str: logger.info(c) else: logger.debug(f"Print data: {str(c.json())}")
def main(): # Fasta file extension: # .ffn for the untranslated nucleotide sequences for each CDS; .faa for protein coding sequences (CDS) # .fa for the fasta alignment results # .fna for whole genomic DNA sequences; .frn for nucleotide sequences of RNA related features usage = "usage: %prog [Options]" parser = OptionParser(usage=usage) parser.add_option("-d", "--dna", metavar="FILE", help="input dna FILE in fasta format", action="store", type="string", dest="dna") parser.add_option("-t", "--tab", metavar="FILE", help="input tab FILE in embl format", action="store", type="string", dest="tab") parser.add_option("-e", "--embl", metavar="FILE", help="input embl FILE with CDS features in embl format", action="store", type="string", dest="embl") parser.add_option("--genedb", help="extract reference genome protein sequences from geneDB", action="store_true", dest="db") parser.add_option("--fasta", help="run fasta against each extracted in-house genomes", action="store_true", dest="fasta") parser.add_option("--hamap", help="run pfscan against HAMAP profiles", action="store_true", dest="hamap") parser.add_option("--clean", help="delete all results without deleting reference genomes", action="store_true", dest="clean") parser.add_option("--deepclean", help="delete all reference genomes and results", action="store_true", dest="deepclean") (options, args) = parser.parse_args() # Print help if no argument given if util.printHelp(options): parser.print_help() sys.exit() # Print command line cmdline = "$ python " for argv in sys.argv: cmdline += argv + " " logger.debug(cmdline) # >>> --------------------------------------------------------------------- # >>> DATA PREPARATION # >>> --------------------------------------------------------------------- # List of needed software for softname in soft_lists: util.checkSoft(softname) # Prepare new genome data if options.dna and options.tab and not options.embl: util.checkFile(options.dna) mygenome_emblfile = fasta2embl(options.dna) mygenome_emblfile_withcds = concatFeatures(mygenome_emblfile, options.tab) splitSeq(mygenome_dir, mygenome_emblfile_withcds, "CDS") translateSeq(mygenome_dir) elif not options.dna and not options.tab and options.embl: mygenome_emblfile_withcds = options.embl splitSeq(mygenome_dir, mygenome_emblfile_withcds, "CDS") #splitSeqWithBiopython(mygenome_emblfile_withcds, "CDS") # does not work with testdata_01 translateSeq(mygenome_dir) elif not options.deepclean: util.checkDir(mygenome_dir) # Extract in house genomes from chado db if options.db: chadoDump(refgenomes_dir) elif not options.deepclean: util.checkDir(refgenomes_dir) # bsub output directory if IS_LSF and not (options.clean or options.deepclean): util.createDir(bsub_dir) # >>> --------------------------------------------------------------------- # >>> ORTHOLOG SEARCH # >>> --------------------------------------------------------------------- # Run fasta & reciprocal fasta if options.fasta: runFasta(mygenome_dir, refgenomes_dir, fasta_dir) fasta_hits = topFastaHits(fasta_dir, refgenomes_extractedseq_dir) concatSeq(mygenome_fastafile_allcds, mygenome_dir) runReciprocalFasta(refgenomes_extractedseq_dir, mygenome_fastafile_allcds, reciprocalfasta_dir) reciprocalfasta_hits = topReciprocalFastaHits(reciprocalfasta_dir) printMSPCrunch(fasta_hits, reciprocalfasta_hits) hits = getHits(fasta_hits, reciprocalfasta_hits) logger.info("ORTHOLOGS") logger.info(hits['ortholog']) logger.info("SIMILARITY") logger.info(hits['similarity']) transferFeatures(hits['ortholog']) # Run hamap scan if options.hamap: runHamapScan(mygenome_dir, hamap_dir) # >>> --------------------------------------------------------------------- # >>> CLEANING OUTPUT DATA # >>> --------------------------------------------------------------------- # Clean results before a re-run if options.clean: # fasta results util.rmDir(fasta_dir) util.rmDir(reciprocalfasta_dir) util.rmDir(refgenomes_extractedseq_dir) util.rmFile(mygenome_fastafile_allcds) # hamap results util.rmDir(hamap_dir) # bsub outputs if IS_LSF: util.rmDir(bsub_dir) # Deep clean - remove all if options.deepclean: util.rmDir(refgenomes_dir) util.rmDir(mygenome_dir) util.rmDir(fasta_dir) util.rmDir(reciprocalfasta_dir) util.rmDir(refgenomes_extractedseq_dir) util.rmFile(mygenome_fastafile_allcds) util.rmDir(hamap_dir)
i = 0 qdf = None j = 0 tot_query_answering_time = 0 start = time.time() for qname, q in queries: logger.info("Query :\n{}\n".format(q)) ####Execute Query and obtain result start_query = time.time() cur.execute(q) tot_query_answering_time += (time.time() - start_query) res = cur.fetchall() res_df = pd.DataFrame(res) res_df = res_df.set_index(np.arange(i, i + res_df.shape[0])) if res_df.empty: logger.debug("Query is empty") j += 1 continue pr = Parser() qv = QueryVectorizer(set(df['column_name'].tolist())) #Begin parsing the query and vectorizing its parameters pr.parse(q) dict_obj = pr.get_vector() proj_list = pr.get_projections() logger.debug("List of Projections : \n {}".format(proj_list)) rename_names = { key: value for key in res_df.columns for value in proj_list if value.split('_')[0] in key } res_df = res_df.rename(columns=rename_names)