def setTaskStatus(self, taskState, taskId): try: connection = DB_Manager.getConnection() cursor = connection.cursor() sql = "UPDATE task SET taskState = " + taskState + " WHERE taskId = " + str( taskId) # sql = "UPDATE task SET taskState = 'PENDING' WHERE taskId = 4" print sql cursor.execute(sql) connection.commit() if cursor.rowcount: print(cursor.rowcount, "record updated.") logging.info("1 record updated.") else: print('no record updated') logging.warnings('no record updated') return except Exception as e: logging.exception(e) error_msg = 'Error occured while setTaskStatus into task' logging.error(error_msg) raise finally: DB_Manager.closeConnection()
def add(self, task): try: connection = DB_Manager.getConnection() cursor = connection.cursor() sql = "INSERT INTO task (taskName, taskParameter, taskPath, taskState, userID, assignedNodeId) VALUES (%s, %s, %s, %s, %s, %s)" val = (task._taskName, task._taskParameters, task._taskExePath, task._taskState, task._taskUserId, None) # val = ("1111111", "John", "21", 3, '997', None) # val = (hostName, userName) # nodeid is foreign key of assignedNodeId # clientId is foreign key of userID cursor.execute(sql, val) connection.commit() if cursor._last_insert_id: logging.info(str(cursor.rowcount) + " task inserted.") return cursor._last_insert_id else: logging.warnings('Task is not added to DB') logging.error('Task is not added to DB') return except Exception as e: logging.exception(e) raise finally: DB_Manager.closeConnection()
def search_nested_dict_bfs(root, item2search, saved_dict=None): queue = [root] found_item = set() if not saved_dict: saved_dict = {} while len(queue) and len(found_item) < len(item2search): node = queue.pop() if isinstance(node, dict): for k, v in node.items(): if k in item2search: if k in saved_dict.keys(): logging.warnings('overlapped feature names') else: saved_dict[k] = v found_item.add(k) if len(found_item) == len(item2search): return saved_dict elif isinstance(v, dict): queue.append(v) elif isinstance(node, list): for i in node: queue.append(i) not_found_item = [i for i in item2search if i not in found_item] for item in not_found_item: saved_dict[item] = None return saved_dict
def get_dairy_path(config, start_date, end_date, filetype='.csv'): """input: date ranges for dairy data output: return all files path in that range""" print('get_dairy_path') folder = os.path.join(os.getcwd(), config['data_folder']) result_path = [] if isinstance(start_date, str): start_date = datetime.datetime.strptime(start_date, '%Y%m%d') if isinstance(end_date, str): end_date = datetime.datetime.strptime(end_date, '%Y%m%d') while start_date <= end_date: this_folder = os.path.join( folder, datetime.datetime.strftime(start_date, '%Y%m%d')) if os.path.isdir(this_folder): for r, d, fn in walk(this_folder): result_path.extend( [os.path.join(r, f) for f in fn if f.endswith(filetype)]) start_date += datetime.timedelta(days=1) if len(result_path) < 1: logging.warnings('No dairy retrieved.') return result_path
def sendmail(spiderName, stats, logPath): FROM = os.getenv('email.from') TO = os.getenv('email.to') # TO = ['*****@*****.**'] passwd = os.getenv('email.passwd') if not FROM or not TO or not passwd: logging.warning('邮件发送取消:请设置相关环境变量') return # 构建邮件体 msg = EmailMessage() msg['Subject'] = f'爬虫 - {spiderName} - 采集情况' msg['From'] = FROM msg['To'] = TO # msg['To'] = ', '.join(TO) msg.set_content('\n'.join([f'{k}: {v}' for k, v in stats.items()])) # 添加附件 if os.path.exists(logPath): with open(logPath, 'rb') as fp: msg.add_attachment( fp.read(), maintype='application', subtype='octet-stream', filename=os.path.basename(logPath), ) # 发送邮件 with smtplib.SMTP_SSL('smtp.qq.com', 465, timeout=5) as client: client.ehlo() client.login(FROM, passwd) failList = client.sendmail(FROM, TO, msg.as_string()) if failList: logging.warnings(f'无法发送邮件到以下联系人:{failList}') else: logging.info('邮件发送成功')
def get_dairy_path(config, start_date, end_date): """input: date ranges for dairy data output: return all files path in that range""" folder = config["data"]["folder"] result_path = [] if isinstance(start_date, str): start_date = datetime.datetime.strptime(start_date, '%Y%m%d') if isinstance(end_date, str): end_date = datetime.datetime.strptime(end_date, '%Y%m%d') while start_date <= end_date: this_folder = os.path.join( folder, datetime.datetime.strftime(start_date, '%Y%m%d')) if os.path.isdir(this_folder): for r, d, fn in walk(this_folder): result_path.extend([os.path.join(r, f) for f in fn]) start_date += datetime.timedelta(days=1) if len(result_path) < 1: logging.warnings('No dairy retrieved.') return result_path
def process_negations(self, doc): """ Find negations in doc and clean candidate negations to remove pseudo negations Parameters ---------- doc: object spaCy Doc object Returns ------- preceding: list list of tuples for preceding negations following: list list of tuples for following negations terminating: list list of tuples of terminating phrases """ ### # does not work properly in spacy 2.1.8. Will incorporate after 2.2. # Relying on user to use NER in meantime # see https://github.com/jenojp/negspacy/issues/7 ### # if not doc.is_nered: # raise ValueError( # "Negations are evaluated for Named Entities found in text. " # "Your SpaCy pipeline does not included Named Entity resolution. " # "Please ensure it is enabled or choose a different language model that includes it." # ) preceding = list() following = list() terminating = list() matches = self.matcher(doc) psuedo = [(match_id, start, end) for match_id, start, end in matches if match_id == self.keys[0]] for match_id, start, end in matches: if match_id == self.keys[0]: continue psuedo_flag = False for p in psuedo: if start >= p[1] and start <= p[2]: psuedo_flag == True continue if not psuedo_flag: if match_id == self.keys[1]: preceding.append((match_id, start, end)) elif match_id == self.keys[2]: following.append((match_id, start, end)) elif match_id == self.keys[3]: terminating.append((match_id, start, end)) else: logging.warnings( f"phrase {doc[start:end].text} not in one of the expected matcher types." ) return preceding, following, terminating
def getCDR(cdrfile): V_CDR = {} if not os.path.exists(cdrfile): logging.warnings('Cannot find CDR boundary file %s' % os.path.basename(cdrfile)) return None else: for line in open(cdrfile): l = line.strip().split() V_CDR[l[0]] = [int(b) for b in l[1:]] return V_CDR
async def __call__(self, request): kw = None if self._has_var_kw_arg or self._has_named_kw_args or self._required_kw_args: if request.method == 'POST': if not request.content_type: return web.HTTPBadRequest('Missing Content-Type.') ct = request.content_type.lower() if ct.startswith('application/json'): params = await request.json() if not isinstance(params, dict): return web.HTTPBadRequest('JSON body must br object.') kw = params elif ct.startswith('application/x-www-form-urlencoded' ) or ct.starstwith('multipart/form-data'): params = await request.post() kw = dict(**params) else: return web.HTTPBadRequest('Unsupported Content-Type: %s' % request.content_type) if request.method == 'GET': qs = request.query_string if qs: kw = dict() for k, v in parse.parse_qs(qs, True).items(): kw[k] = v[0] if kw is None: kw = dict(**request.match_info) else: if not self._has_var_kw_arg and self._named_kw_args: # remove all unamed kw: copy = dict() for name in self._named_kw_args: if name in kw: copy[name] = kw[name] kw = copy # check named arg: for k, v in request.match_info.items(): if k in kw: logging.warnings( 'Duplicate arg name in named arg and kw args: %s' % k) kw[k] = v if self._has_request_arg: kw['request'] = request #check required kw: if self._required_kw_args: for name in self._required_kw_args: if not name in kw: return web.HTTPBadRequest('Missing argument: %s' % name) logging.info('call with args: %s' % str(kw)) try: r = await self._func(**kw) return r except APIError as e: return dict(error=e.error, data=e.data, message=e.message)
def get_es_url(query_type): ''' Returns elasticsearch url required for running queries ''' settings = get_app_settings('es_settings') if settings: es_url = "http://%s:%s/%s/%s/" % (settings['es_host'], settings['es_port'], settings['es_search_index'], settings['es_search_type']) if query_type: es_url += query_type return es_url else: logging.warnings('No settings found for elasticsearch') return None
def executeModule(self, module, tag, inputs): if not module: logging.warnings('publish build not valid', Warning) return if not self.type: logging.warnings('publish build type not valid', Warning) return try: result, data, message = module.testRun(tag, inputs) except Exception as except_error: result, data, message = 'runtime error', [], str(except_error) value = self.bundle_value[result][1] color = self.bundle_value[result][0] return result, value, color, data, message
def process_negations(self, doc): """ Find negations in doc and clean candidate negations to remove pseudo negations Parameters ---------- doc: object spaCy Doc object Returns ------- preceding: list list of tuples for preceding negations following: list list of tuples for following negations terminating: list list of tuples of terminating phrases """ # if not doc.is_nered: # raise ValueError( # "Negations are evaluated for Named Entities found in text. " # preceding = list() following = list() terminating = list() matches = self.matcher(doc) pseudo = [(match_id, start, end) for match_id, start, end in matches if self.nlp.vocab.strings[match_id] == "pseudo"] for match_id, start, end in matches: if self.nlp.vocab.strings[match_id] == "pseudo": continue pseudo_flag = False for p in pseudo: if start >= p[1] and start <= p[2]: pseudo_flag = True continue if not pseudo_flag: if self.nlp.vocab.strings[match_id] == "Preceding": preceding.append((match_id, start, end)) elif self.nlp.vocab.strings[match_id] == "Following": following.append((match_id, start, end)) elif self.nlp.vocab.strings[match_id] == "Termination": terminating.append((match_id, start, end)) else: logging.warnings( f"phrase {doc[start:end].text} not in one of the expected matcher types." ) return preceding, following, terminating
def process_negations(self, doc): """ Find negations in doc and clean candidate negations to remove pseudo negations Parameters ---------- doc: object spaCy Doc object Returns ------- preceeding: list list of tuples for preceeding negations following: list list of tuples for following negations terminating: list list of tuples of terminating phrases """ preceeding = list() following = list() terminating = list() matches = self.matcher(doc) psuedo = [(match_id, start, end) for match_id, start, end in matches if match_id == self.keys[0]] for match_id, start, end in matches: if match_id == self.keys[0]: continue psuedo_flag = False for p in psuedo: if start >= p[1] and start <= p[2]: psuedo_flag == True continue if not psuedo_flag: if match_id == self.keys[1]: preceeding.append((match_id, start, end)) elif match_id == self.keys[2]: following.append((match_id, start, end)) elif match_id == self.keys[3]: terminating.append((match_id, start, end)) else: logging.warnings( f"phrase {doc[start:end].text} not in one of the expected matcher types." ) return preceeding, following, terminating
async def extend(message, event): """`async def extend(message, event) -> None`: Periodically extend the message's acknowledgement deadline. Args: ----- msg (Message): consumed event message to extend. event (asyncio.Event): event to watch for message extention or cleaning up. """ timeout_count=60 while not event.is_set(): try: assert(timeout_count > 0) msg.extended_cnt += 1 logging.info(f"Extension: extended the deadline by 1.5 seconds for {message}") timeout_count -= 1 await asyncio.sleep(1.5) except AssertionError as err: logging.warnings("Extension: Grace period has elapsed. Shutting down.") raise(ExtensionError(err))
def extract(self, base, output): valid_shema = super().get_valid_shema(base) strains = base.get_all_strains() mlst = base.get_mlst(valid_shema) table = pd.DataFrame(columns=["#GeneId"] + strains) for gene in valid_shema: row = {"#GeneId": gene} mlstg = mlst.get(gene, {}) for strain in strains: row[strain] = mlstg.get(strain, None) table = table.append(row, ignore_index=True) table = table.set_index('#GeneId') if self.form == 'grapetree': if self.duplicate: logging.warnings("Export grapetree table " + "using duplicate genes is not recommended.") table = table.fillna(-1) table = table.transpose() else: table = table.fillna("") table.to_csv(output, sep='\t')
def PCA_mag(dapall, filter_obs, plateifu=None, filename=None, vec_file=None, vec_data=None, pca_data_dir=None): """ Return absolute AB Magnitude in filter provided Parameters ---------- dapall: 'Table', 'dict' DAPALL file data filter_obs: 'str', 'speclite.filters.FilterSequence' observational filter to use plateifu: 'str', list, optional, must be keyword plate-ifu of galaxy desired filename: 'str', optional, must be keyword pca data file to read in, ignores plateifu if provided vec_file: 'str', optional, must be keyword pca data file containing eigenvectors vec_data: 'tuple', optional, must be keyword eigenvector data (mean_spec, evec_spec, lam_spec) """ # Check filter status # CHECK PLATEIFU if plateifu is None: plateifu = dapall["plateifu"] if filter_obs.__class__ is not filters.FilterSequence: if filter_obs in ["GALEX-NUV", "GALEX-FUV"]: wav, resp = np.loadtxt( "{}/data/GALEX_GALEX.NUV.dat".format(directory)).T galex_nuv = filters.FilterResponse(wavelength=wav * u.Angstrom, response=resp, meta=dict(group_name='GALEX', band_name='NUV')) wav, resp = np.loadtxt( "{}/data/GALEX_GALEX.FUV.dat".format(directory)).T galex_fuv = filters.FilterResponse(wavelength=wav * u.Angstrom, response=resp, meta=dict(group_name='GALEX', band_name='FUV')) try: filter_obs = filters.load_filters(filter_obs) except ValueError: logging.warnings("Invalid filter, using default of 'sdss2010-i'") filter_obs = filters.load_filters("sdss2010-i") if filename is None: if plateifu is None: raise ValueError("No input file or plateifu provided") else: filename = os.path.join(pca_data_dir, plateifu, "{}_res.fits".format(plateifu)) spectrum, wlen = PCA_Spectrum(plateifu=plateifu, filename=filename, vec_file=vec_file, vec_data=vec_data, pca_data_dir=pca_data_dir) mag = filter_obs.get_ab_magnitudes( spectrum, wlen, axis=0)[filter_obs.names[0]].data * u.ABmag mag_abs = mag - WMAP9.distmod(dapall["nsa_zdist"]) return mag_abs
def search_read(self, fastqs, identity=0.9, coverage=0.95, reads=10, fasta=None): """Search the **Sequence Type** from raw reads of one strain. :param fastq: List of fastq files containing raw reads :param identity: Sets the minimum identity used by `KMA`_ for sequences research (in percent). :param coverage: Sets the minimum accepted gene coverage for found sequences. :param reads: Sets the minimum reads coverage to conserve an mapping :param fasta: A file where to export genes alleles results in a fasta format. """ if identity < 0 or identity > 1: raise exceptions.BadIdentityRange( "Identity must be between 0 to 1") if coverage < 0 or coverage > 1: raise exceptions.BadCoverageRange( 'Coverage must be in range [0-1]') ##indexing if kma.is_database_indexing(self.__file) is False: with kma.index_tmpfile() as tmpfile: self.__create_all_core_genome_file(tmpfile) tmpfile.flush() kma.index_database(self.__file, tmpfile) ##run kma genome_name = os.path.basename(fastqs[0].name).split('.')[0] core_genome = self.__database.core_genome allele = {i: [] for i in core_genome} try: kma_res, seqs = kma.run_kma(fastqs, self.__file, identity, coverage, reads) except exceptions.CoreGenomePathNotFound: logging.warning("No gene found for the strain %s", genome_name) return ST_result(genome_name, [], allele) logging.info("Search allele gene to database") sequence_type = {i: set() for i in core_genome} for res in kma_res: logging.debug("Looking for kma result of gene %s", res) sequence = seqs.get(res) if sequence is None: raise PyMLSTError("%s not found in the fasta files", res) ## test minus b = (sequence.count('a') + sequence.count('t') + sequence.count('c') + \ sequence.count('g')) if b != 0: logging.debug("%s Remove incertain", res) continue ##add sequence and MLST gene = res.split("_")[0] if gene not in core_genome: logging.warnings("Gene %s not present in database", gene) continue alle = self.__database.get_allele_by_sequence_and_gene( str(sequence), gene) # write fasta file with coregene if fasta is not None: fasta.write(">" + genome_name + "|" + gene + "\n") fasta.write(str(sequence) + "\n") if alle is not None: allele.get(gene).append(str(alle)) seq_types = self.__database.get_st_by_gene_and_allele( gene, alle) for seq_type in seq_types: sequence_type.get(gene).add(seq_type) else: ##search substring sequence sequence_gene = self.__database.get_all_sequences_by_gene(gene) find = False for s in sequence_gene.keys(): if s.find(str(sequence)) != -1: find = True alle = sequence_gene.get(s) logging.debug( "Find subsequence of gene %s with the allele %s", gene, str(alle)) allele.get(gene).append(str(alle)) seq_types = self.__database.get_st_by_gene_and_allele( gene, alle) for seq_type in seq_types: sequence_type.get(gene).add(seq_type) if find is False: allele.get(gene).append("new") st_val = self.__determined_ST(allele, sequence_type) return ST_result(genome_name, st_val, allele)
#10:logging模块 import logging #通过logging.basicConfig进行参数设定 logging.basicConfig( level=logging.DEBUG,#如果将logging级别设置为debug级别,会输出下面的5条信息 filename="log.log",#会生成一个log.log文件存储日志,这个方法是追加模式 filemode="w",#通过覆盖方式而不是追加 format="%(asctime)s %[(lineno)d] %(message)s %(filename)s"#分别为时间、行号、打印的日志内容和运行的py文件名称 ) logging.debug("debug message") logging.info("info message") #logging级别,默认的是warning级别,warning以上级别的输出 logging.warnings("warning message") logging.error("error message") logging.critical("critical message") #通过log对象来实现log日志(这种方法用的相对较多一些) logger=logging.getLogger()#首先创建一个logger对象 #日志打印位置,是文件还是屏幕 fh=logging.FileHandler("文件名和路径")#向文件发放日志,不向屏幕上发布日志 ch=logging.StreamHandler()#向屏幕方法内容 #设置日志的格式 fm=logging.Formatter("%s(asctime)s %(message)s")#参数设定,设置日志输出格式 fh.setFormatter(fh) ch.setFormatter(ch)
def add_reads(self, fastqs, strain=None, identity=0.95, coverage=0.90, \ reads=10): """ Adds raw reads of a strain to the database. How it works: 1. A `KMA`_ research is performed on reads (fastq) of the strain to find sub-sequences matching the core genes. 2. The identified sub-sequences are extracted and added to our database where they are associated to a **sequence ID**. 3. An MLST entry is created, referencing the sequence, the gene it belongs to, and the strain it was found in. :param fastqs: The reads we want to add as a list of `fastq`_ file. :param strain: The name that will be given to the new strain in the database. :param identity: Sets the minimum identity used by `BWA`_ for sequences research (in percent). :param coverage: Sets the minimum accepted coverage for found sequences. :param reads: Sets the minimum number of reads coverage to conserved an results """ with self.database.begin(): if identity < 0 or identity > 1: raise exceptions.BadIdentityRange( 'Identity must be in range [0-1]') if coverage < 0 or coverage > 1: raise exceptions.BadCoverageRange( 'Coverage must be in range [0-1]') ##indexing if kma.is_database_indexing(self.__file) is False: with kma.index_tmpfile() as tmpfile: coregene = self.__create_core_genome_file(tmpfile) tmpfile.flush() kma.index_database(self.__file, tmpfile) ##Strain name name = strain if name is None: name = fastqs[0].name.split('/')[-1] self.__database.check_name(name) ##run kma kma_res, seqs = kma.run_kma(fastqs, self.__file, identity, coverage, reads) core_genes = self.__database.core_genome valid = 0 minus = 0 frame = 0 for res in kma_res: seq = seqs.get(res) if seq is None: raise PyMLSTError("%s not found in the fasta files", res) ## test minus b = (seq.count('a') + seq.count('t') + seq.count('c') + \ seq.count('g')) if b != 0: minus += 1 logging.debug("%s Remove incertain", res) continue ## test CDS try: seq.translate(cds=True, table=11) except: frame += 1 logging.debug("%s Remove bad CDS", res) continue ##add sequence and MLST gene = res.split("_")[0] if gene not in core_genes: logging.warnings("Gene %s not present in database", gene) continue valid += 1 self.__database.add_genome(gene, name, str(seq)) logging.info("Add %s new MLST genes to database", str(valid)) logging.info("Remove %s genes with uncertain bases", str(minus)) logging.info("Remove %s genes with bad CDS", str(frame))
def pull_images_from_dropbox(self): # dropbox source folder for images that are recieved from clients (later via app) DROPBOX_SOURCE_PATH = '/Photos_Before_Processing/' # locat dest folder for raw images that will be processed on this machine RAW_IMAGES_PATH = self.PATH + 'PythonServer/before_images/' # creating a dropbox instance dbx = self.dbx failed_download_images = [] entries_of_images_to_delete_from_dropbox = [] if dbx is not None: logging.info('############ Attempting pull images from Dropbox:') logging.info('source path: ' + DROPBOX_SOURCE_PATH) logging.info('destination path: ' + RAW_IMAGES_PATH) entries = dbx.files_list_folder(DROPBOX_SOURCE_PATH).entries num_of_files_in_dropbox = len(entries) if num_of_files_in_dropbox > 0: logging.info('######## Starting download of ' + str(num_of_files_in_dropbox) + ' files :') print '######## Starting download of ' + str( num_of_files_in_dropbox) + ' files :' current_file_number = 0 # downloading images from dropbox to local folder for entry in entries: #if(current_file_number==11): # break image_name = entry.name source = DROPBOX_SOURCE_PATH + image_name dest = RAW_IMAGES_PATH + image_name # download image from source to dest dbx.files_download_to_file(dest, source) current_file_number = current_file_number + 1 # check if file exists in local folder (successful download) if not os.path.exists(dest): logging.warnings('Could not download image no. ' + str(current_file_number) + ', image name: ' + image_name) print 'Could not download image no. ' + str( current_file_number ) + ', image name: ' + image_name failed_download_images.append(image_name) # if download successful remove file from dropbox else: logging.info('Successfuly downloaded image ' + str(current_file_number) + ' out of ' + str(num_of_files_in_dropbox)) print 'Successfuly downloaded image ' + str( current_file_number) + ' out of ' + str( num_of_files_in_dropbox) dbx.files_delete_v2(source) else: logging.info('There are no files to download from Dropbox')