def parse(self): ''' retrieves the page and parses the contents into the following fields self.name (May include brewery/brand and/or beer) self.price (USD) self.volume (Gallons) self.num_avail (Kegs) self.desc (Keg description) ''' if self.parsed: return self.parsed = True html = get_html(self.url) ''' Attempt to get name and volume ''' try: self.name = html.xpath('//h1/text()')[0].strip() if '(' in self.name and ')' in self.name: split_name = self.name.split('(') self.name = split_name[0].strip() volume = filter(lambda x: is_num(x) if '.' not in x \ else x, split_name[1].strip(')').strip()) if is_num(volume): self.volume = float(volume) else: self.volume = 0.0 else: self.volume = 0.0 except Exception: self.name = '' self.volume = 0.0 ''' Attempt to get price ''' try: self.price = float(html.xpath('//span[@class="ProductDetailItemPric\ e"]/text()')[0].strip().strip('$')) except Exception: self.price = 0.0 ''' Attempt to get number of available kegs ''' try: self.num_avail = int(html.xpath('//em/text()\ ')[0].strip().split()[0]) except Exception: self.num_avail = 0 ''' Attempt to get description ''' try: self.desc = html.xpath('//td[@class="ProductDetailCell"]/p/text()\ ')[0].strip() except Exception: self.desc = ''
def tokenize_sent(sent, lemtzr, stopword): tokens = tokenize(sent.strip().lower()) tokens = re.sub(r'[^A-Za-z0-9]+', ' ', ' '.join(tokens)) tokens = [tok for tok in tokens.split() if tok not in stopword] tokens = [tok if not is_num(tok) else '<NUM>' for tok in tokens] tokens = [lemtzr.lemmatize(tok) for tok in tokens] return tokens
async def handle_cli(self, cli_ws: WebSocketConn, _: str): logger.info("Command and control connection established") try: while True: await cli_ws.send(CLI_OPTIONS) # To not care about empty lines choice = None while not choice: choice = (await cli_ws.recv()) if choice == "0": await cli_ws.send(self.ctx.get_database_summary()) continue # Validate the input nums = choice.split(" ") if any(filter(lambda x: not is_num(x), nums)): await cli_ws.send("Unknown input") continue # Start bash with this client if len(nums) == 1 and nums[0]: await self.start_bash(cli_ws, int(nums[0])) continue # Execute commands await self.execute_commands(cli_ws, [int(x) for x in nums]) except ConnectionClosedError: logger.info("Command and control connection closed")
def remove_periods(text): # doesn't remove decimal places out = [] for word in text: if is_num(word): out += [word] continue out += word.split('.') return out
def load_stopwords(idf_path): idf_raw_dict = json.load(open(idf_path)) for k in idf_raw_dict.keys(): if is_punctuation(k) or is_num(k): idf_raw_dict[k] = 0 wordlst = list(idf_raw_dict.items()) wordlst = sorted(wordlst, key=lambda x: x[1])[::-1] stop_words_set = set(x[0] for x in wordlst[1:51]) # ALL_DOC_NUM is not a token return stop_words_set
def parser(node, level=1): f = str(type(node)) == "<class 'nodes.DecisionNode'>" if f: if helper.is_num(node.question.wedge): operator = [">=", "<"] else: operator = ["=", "!="] ret = "Question: Is " + str(node.question.col_index) + \ "th Column " + operator[0] + " " + str(node.question.wedge)+"\n" ret += level * " " + "T: " + parser(node.l_child, level + 1) ret += level * " " + "F: " + parser(node.r_child, level + 1) else: ret = "Predict: " + str(node.get_prediction()) + "\n" return ret
def read_ec_to_score(ec_preds_file): ec_to_score = {} with open(ec_preds_file) as input: for line in input: line = line.strip() if line == "": continue split = line.split("\t") if utils.is_num(split[-1]): ec, score = split[0], float(split[-1]) else: ec, score = split[0], float(split[-2]) add_to_score_dict(ec_to_score, ec, score) return ec_to_score
def write_split_objective(reaction_equation, bounds, writer): temp_objectives = [] rxn_to_info = {} if "-->" in reaction_equation: metabolite_parts = reaction_equation.split("-->")[0] else: metabolite_parts = reaction_equation.split("<->")[0] for elem in metabolite_parts.split(): if elem == "+" or utils.is_num(elem): continue rxn_name = "R_arch_nec_import_" + elem temp_objective = "TEMP_obj_" + elem temp_objectives.append(temp_objective) added_string = "\t".join([temp_objective + ":", elem + " -->", "[0, 1000]"]) rxn_to_info[temp_objective] = "\t".join([rxn_name + ":", elem + " -->", "[-1000, 0]"]) writer.write(added_string + "\n") return temp_objectives, rxn_to_info
def read_and_split_conf_preds(ec_preds_file, high_cutoff, low_cutoff): high_conf_ecs = set() high_and_low_conf_ecs = set() all_ec_to_gene = {} low_ec_to_score_to_gene = {} with open(ec_preds_file) as input: for line in input: line = line.strip() if line == "": continue split = line.split("\t") if utils.is_num(split[-1]): ec, score = split[0], float(split[-1]) gene = "\t".join(split[1:-1]) else: ec, score = split[0], float(split[-2]) gene = "\t".join(split[1:-2]) if score > high_cutoff: high_conf_ecs.add(ec) utils.add_to_dict(all_ec_to_gene, ec, gene) if score > low_cutoff: high_and_low_conf_ecs.add(ec) utils.add_to_dict_key_score_value(low_ec_to_score_to_gene, ec, score, gene) # For the low-confidence predictions, only retain the genes predicting an EC with the highest score. for ec, score_to_gene in low_ec_to_score_to_gene.items(): if ec in high_conf_ecs: continue max_score = max(score_to_gene.keys()) for gene in score_to_gene[max_score]: utils.add_to_dict(all_ec_to_gene, ec, gene) return high_conf_ecs, high_and_low_conf_ecs, all_ec_to_gene
if "{}|".format(k) in " ".join(data.keys()): data.pop(k) else: # project, resize, image_extents are not there # so remove their children for key in data.keys(): if k in key: data.pop(key) # this dictionary will hold the output out_dict = {} for k, v in data.iteritems(): # all values coming in from the post request # are unicode, convert those values which # should be int or float tdict = gen_nested_dict(k.split("|"), is_num(v)) # deep_update updates the dictionary deep_update(out_dict, tdict) # MODIS only receive l1 or stats modis_list = ['l1'] if 'stats' in landsat_list: modis_list.append('stats') # we dont need these values returned by the available-products query if 'date_restricted' in scene_dict_all_prods: scene_dict_all_prods.pop('date_restricted') for key in scene_dict_all_prods: if 'mod' in key or 'myd' in key: scene_dict_all_prods[key]['products'] = modis_list
def evaluar(self, maquina): if isinstance(self.izquierda, str): logger.debug( "Buscamos valor de variable {var}".format(var=self.izquierda)) r_izq = maquina.obtener_valor_maq(self.izquierda) logger.debug("Valor de r_izq es {}".format(r_izq)) eval_izq = False elif is_num(self.izquierda): logger.debug("Valor es numero {}".format(self.izquierda)) eval_izq = False r_izq = self.izquierda else: logger.debug("Valor es de tipo {t}".format(t=type(self.izquierda))) izq = self.izquierda eval_izq = True if eval_izq: logger.debug("Evaluando lado izquierdo") izq.evaluar(maquina) r_izq = maquina.pop_resultado() logger.debug("Resultado {r}".format(r=r_izq)) if isinstance(self.derecha, str): logger.debug("Buscamos en derecha la variable {var}".format( var=self.derecha)) r_der = maquina.obtener_valor_maq(self.izquierda) logger.debug("Tenemos como resultado {r}".format(r=r_der)) eval_der = False elif is_num(self.derecha): logger.debug("Derecha es numero {d}".format(d=self.derecha)) eval_der = False r_der = self.derecha else: logger.debug("Valor es de tipo {t}".format(t=type(self.derecha))) der = self.derecha eval_der = True if eval_der: logger.debug("Evaluando derecha") der.evaluar(maquina) r_der = maquina.pop_resultado() logger.debug("Valor de r_der es {}".format(r_der)) temp = None logger.debug("Operador es {}".format(self.hoja)) if self.hoja == '+': temp = r_izq + r_der elif self.hoja == '-': temp = r_izq - r_der elif self.hoja == '/': temp = r_izq / r_der elif self.hoja == '*': temp = r_izq * r_der elif self.hoja == '%': temp = r_izq % r_der elif self.hoja == '||': temp = r_izq or r_der elif self.hoja == '&&': temp = r_izq and r_der elif self.hoja == '<': temp = r_izq < r_der elif self.hoja == '<=': temp = r_izq <= r_der elif self.hoja == '>': temp = r_izq > r_der elif self.hoja == '>=': temp = r_izq >= r_der elif self.hoja == '==': temp = r_izq == r_der elif self.hoja == '!=': temp = r_izq != r_der else: raise BinOpError("Error en operador {}".format(self.hoja)) logger.debug("Pushing resultado {}".format(temp)) maquina.push_resultado(temp)