Example #1
0
    def parse(self):
        ''' retrieves the page and parses the contents into the following fields

                self.name    (May include brewery/brand and/or beer)
                self.price   (USD)
                self.volume  (Gallons)
                self.num_avail  (Kegs)
                self.desc    (Keg description)
        '''
        if self.parsed:
            return

        self.parsed = True

        html = get_html(self.url)

        ''' Attempt to get name and volume '''
        try:
            self.name = html.xpath('//h1/text()')[0].strip()
            if '(' in self.name and ')' in self.name:
                split_name = self.name.split('(')
                self.name = split_name[0].strip()

                volume = filter(lambda x: is_num(x) if '.' not in x \
                                else x, split_name[1].strip(')').strip())
                if is_num(volume):
                    self.volume = float(volume)
                else:
                    self.volume = 0.0
            else:
                self.volume = 0.0
        except Exception:
            self.name = ''
            self.volume = 0.0

        ''' Attempt to get price '''
        try:
            self.price = float(html.xpath('//span[@class="ProductDetailItemPric\
                                          e"]/text()')[0].strip().strip('$'))
        except Exception:
            self.price = 0.0

        ''' Attempt to get number of available kegs '''
        try:
            self.num_avail = int(html.xpath('//em/text()\
                                            ')[0].strip().split()[0])
        except Exception:
            self.num_avail = 0

        ''' Attempt to get description '''
        try:
            self.desc = html.xpath('//td[@class="ProductDetailCell"]/p/text()\
                                   ')[0].strip()
        except Exception:
            self.desc = ''
def tokenize_sent(sent, lemtzr, stopword):
    tokens = tokenize(sent.strip().lower())
    tokens = re.sub(r'[^A-Za-z0-9]+', ' ', ' '.join(tokens))
    tokens = [tok for tok in tokens.split() if tok not in stopword]
    tokens = [tok if not is_num(tok) else '<NUM>' for tok in tokens]
    tokens = [lemtzr.lemmatize(tok) for tok in tokens]
    return tokens
Example #3
0
    async def handle_cli(self, cli_ws: WebSocketConn, _: str):
        logger.info("Command and control connection established")
        try:
            while True:
                await cli_ws.send(CLI_OPTIONS)

                # To not care about empty lines
                choice = None
                while not choice:
                    choice = (await cli_ws.recv())

                if choice == "0":
                    await cli_ws.send(self.ctx.get_database_summary())
                    continue

                # Validate the input
                nums = choice.split(" ")
                if any(filter(lambda x: not is_num(x), nums)):
                    await cli_ws.send("Unknown input")
                    continue

                # Start bash with this client
                if len(nums) == 1 and nums[0]:
                    await self.start_bash(cli_ws, int(nums[0]))
                    continue

                # Execute commands
                await self.execute_commands(cli_ws, [int(x) for x in nums])
        except ConnectionClosedError:
            logger.info("Command and control connection closed")
Example #4
0
def remove_periods(text):  # doesn't remove decimal places
    out = []
    for word in text:
        if is_num(word):
            out += [word]
            continue
        out += word.split('.')
    return out
Example #5
0
def load_stopwords(idf_path):
    idf_raw_dict = json.load(open(idf_path))
    for k in idf_raw_dict.keys():
        if is_punctuation(k) or is_num(k):
            idf_raw_dict[k] = 0
    wordlst = list(idf_raw_dict.items())
    wordlst = sorted(wordlst, key=lambda x: x[1])[::-1]
    stop_words_set = set(x[0]
                         for x in wordlst[1:51])  # ALL_DOC_NUM is not a token
    return stop_words_set
Example #6
0
def parser(node, level=1):
    f = str(type(node)) == "<class 'nodes.DecisionNode'>"
    if f:
        if helper.is_num(node.question.wedge):
            operator = [">=", "<"]
        else:
            operator = ["=", "!="]

        ret = "Question: Is " + str(node.question.col_index) + \
        "th Column " + operator[0] + " " + str(node.question.wedge)+"\n"
        ret += level * "  " + "T: " + parser(node.l_child, level + 1)
        ret += level * "  " + "F: " + parser(node.r_child, level + 1)
    else:
        ret = "Predict: " + str(node.get_prediction()) + "\n"
    return ret
Example #7
0
def read_ec_to_score(ec_preds_file):

    ec_to_score = {}
    with open(ec_preds_file) as input:
        for line in input:
            line = line.strip()
            if line == "":
                continue
            split = line.split("\t")

            if utils.is_num(split[-1]):
                ec, score = split[0], float(split[-1])
            else:
                ec, score = split[0], float(split[-2])
            
            add_to_score_dict(ec_to_score, ec, score)
    return ec_to_score
def write_split_objective(reaction_equation, bounds, writer):

    temp_objectives = []
    rxn_to_info = {}
    if "-->" in reaction_equation:
        metabolite_parts = reaction_equation.split("-->")[0]
    else:
        metabolite_parts = reaction_equation.split("<->")[0]
    for elem in metabolite_parts.split():
        if elem == "+" or utils.is_num(elem):
            continue
        rxn_name = "R_arch_nec_import_" + elem
        temp_objective = "TEMP_obj_" + elem
        temp_objectives.append(temp_objective)
        added_string = "\t".join([temp_objective + ":", elem + " -->", "[0, 1000]"])
        rxn_to_info[temp_objective] = "\t".join([rxn_name + ":", elem + " -->", "[-1000, 0]"])
        writer.write(added_string + "\n")
    return temp_objectives, rxn_to_info
Example #9
0
def read_and_split_conf_preds(ec_preds_file, high_cutoff, low_cutoff):

    high_conf_ecs = set()
    high_and_low_conf_ecs = set()
    all_ec_to_gene = {}
    low_ec_to_score_to_gene = {}

    with open(ec_preds_file) as input:
        for line in input:
            line = line.strip()
            if line == "":
                continue
            split = line.split("\t")

            if utils.is_num(split[-1]):
                ec, score = split[0], float(split[-1])
                gene = "\t".join(split[1:-1])
            else:
                ec, score = split[0], float(split[-2])
                gene = "\t".join(split[1:-2])

            if score > high_cutoff:
                high_conf_ecs.add(ec)
                utils.add_to_dict(all_ec_to_gene, ec, gene)

            if score > low_cutoff:
                high_and_low_conf_ecs.add(ec)
                utils.add_to_dict_key_score_value(low_ec_to_score_to_gene, ec,
                                                  score, gene)

    # For the low-confidence predictions, only retain the genes predicting an EC with the highest score.
    for ec, score_to_gene in low_ec_to_score_to_gene.items():
        if ec in high_conf_ecs:
            continue
        max_score = max(score_to_gene.keys())
        for gene in score_to_gene[max_score]:
            utils.add_to_dict(all_ec_to_gene, ec, gene)

    return high_conf_ecs, high_and_low_conf_ecs, all_ec_to_gene
Example #10
0
            if "{}|".format(k) in " ".join(data.keys()):
                data.pop(k)
        else:
            # project, resize, image_extents are not there
            # so remove their children
            for key in data.keys():
                if k in key:
                    data.pop(key)

    # this dictionary will hold the output
    out_dict = {}
    for k, v in data.iteritems():
        # all values coming in from the post request
        # are unicode, convert those values which
        # should be int or float
        tdict = gen_nested_dict(k.split("|"), is_num(v))
        # deep_update updates the dictionary
        deep_update(out_dict, tdict)

    # MODIS only receive l1 or stats
    modis_list = ['l1']
    if 'stats' in landsat_list:
        modis_list.append('stats')

    # we dont need these values returned by the available-products query
    if 'date_restricted' in scene_dict_all_prods:
        scene_dict_all_prods.pop('date_restricted')

    for key in scene_dict_all_prods:
        if 'mod' in key or 'myd' in key:
            scene_dict_all_prods[key]['products'] = modis_list
Example #11
0
 def evaluar(self, maquina):
     if isinstance(self.izquierda, str):
         logger.debug(
             "Buscamos valor de variable {var}".format(var=self.izquierda))
         r_izq = maquina.obtener_valor_maq(self.izquierda)
         logger.debug("Valor de r_izq es {}".format(r_izq))
         eval_izq = False
     elif is_num(self.izquierda):
         logger.debug("Valor es numero {}".format(self.izquierda))
         eval_izq = False
         r_izq = self.izquierda
     else:
         logger.debug("Valor es de tipo {t}".format(t=type(self.izquierda)))
         izq = self.izquierda
         eval_izq = True
     if eval_izq:
         logger.debug("Evaluando lado izquierdo")
         izq.evaluar(maquina)
         r_izq = maquina.pop_resultado()
         logger.debug("Resultado {r}".format(r=r_izq))
     if isinstance(self.derecha, str):
         logger.debug("Buscamos en derecha la variable {var}".format(
             var=self.derecha))
         r_der = maquina.obtener_valor_maq(self.izquierda)
         logger.debug("Tenemos como resultado {r}".format(r=r_der))
         eval_der = False
     elif is_num(self.derecha):
         logger.debug("Derecha es numero {d}".format(d=self.derecha))
         eval_der = False
         r_der = self.derecha
     else:
         logger.debug("Valor es de tipo {t}".format(t=type(self.derecha)))
         der = self.derecha
         eval_der = True
     if eval_der:
         logger.debug("Evaluando derecha")
         der.evaluar(maquina)
         r_der = maquina.pop_resultado()
         logger.debug("Valor de r_der es {}".format(r_der))
     temp = None
     logger.debug("Operador es {}".format(self.hoja))
     if self.hoja == '+':
         temp = r_izq + r_der
     elif self.hoja == '-':
         temp = r_izq - r_der
     elif self.hoja == '/':
         temp = r_izq / r_der
     elif self.hoja == '*':
         temp = r_izq * r_der
     elif self.hoja == '%':
         temp = r_izq % r_der
     elif self.hoja == '||':
         temp = r_izq or r_der
     elif self.hoja == '&&':
         temp = r_izq and r_der
     elif self.hoja == '<':
         temp = r_izq < r_der
     elif self.hoja == '<=':
         temp = r_izq <= r_der
     elif self.hoja == '>':
         temp = r_izq > r_der
     elif self.hoja == '>=':
         temp = r_izq >= r_der
     elif self.hoja == '==':
         temp = r_izq == r_der
     elif self.hoja == '!=':
         temp = r_izq != r_der
     else:
         raise BinOpError("Error en operador {}".format(self.hoja))
     logger.debug("Pushing resultado {}".format(temp))
     maquina.push_resultado(temp)
Example #12
0
            if "{}|".format(k) in " ".join(data.keys()):
                data.pop(k)
        else:
            # project, resize, image_extents are not there
            # so remove their children
            for key in data.keys():
                if k in key:
                    data.pop(key)

    # this dictionary will hold the output
    out_dict = {}
    for k, v in data.iteritems():
        # all values coming in from the post request
        # are unicode, convert those values which
        # should be int or float
        tdict = gen_nested_dict(k.split("|"), is_num(v))
        # deep_update updates the dictionary
        deep_update(out_dict, tdict)

    # MODIS only receive l1 or stats
    modis_list = ['l1']
    if 'stats' in landsat_list:
        modis_list.append('stats')

    # we dont need these values returned by the available-products query
    if 'date_restricted' in scene_dict_all_prods:
        scene_dict_all_prods.pop('date_restricted')

    for key in scene_dict_all_prods:
            if 'mod' in key or 'myd' in key:
                scene_dict_all_prods[key]['products'] = modis_list