Esempio n. 1
0
 def __init__(self, bread_crumb_idx=-2, start_row_idx):
     super().__init__()
     self.bread_crumb_idx = bread_crumb_idx
     self.start_row_idx = start_row_idx
     self.docs = ah.MongoDocCreator()
     self.text_to = lp.Parser()
     self.err_to = ErrorHandler()
Esempio n. 2
0
    def __init__(self):
        super().__init__()
        # scrapy.Spider.__init__(self)
        # base_path = os.path.join('..', '..', '..')
        # Start a new Scrapy log file
        # open(os.path.join(base_path, 'logs', 'scrapy_log.log'), 'w').close()

        self.docs = ah.MongoDocCreator()
        self.text_to = lp.Parser()
        self.err_to = ErrorHandler()
Esempio n. 3
0
    def simple_query(self):
        p = lp.Parser()
        p.parse(self.q)
        query = ah.MongoDocCreator(ext_elems_to_analyze=[p.result])
        elem = list(
            query.make_mongoelem()
        )[0]  # There should be only one element in this list. See analyze_html.py
        print('elem: ', elem)

        blobs = []
        for trm in elem['term']:
            blob_tags = TextBlob(trm).tags
            blobs += [
                blob[0].lower() for blob in blob_tags
                if blob[1] == 'JJ' or blob[1] == 'NNP' or blob[1] == 'NN'
                or blob[1] == 'VBG' or blob[1] == 'RB' or blob[1] == 'NNS'
                or blob[1] == 'VBD' or blob[1] == 'VBN'
            ]

        try:  # Try if 'from' and 'to' fields exist. In other words if there are numbers in the query
            if type(elem['param']) == list or not elem['param']:
                elem_from = elem['from']
                elem_to = elem['to']
            else:
                elem_from = elem_to = (elem['from'] + elem['to']) / 2
            if elem_to == inf:
                query_dict = {
                    'word': {
                        '$all': blobs
                    },
                    'to': {
                        '$gte': elem_from
                    },
                    'unit': elem['unit']
                }
            elif elem_from == -inf:
                query_dict = {
                    'word': {
                        '$all': blobs
                    },
                    'from': {
                        '$lte': elem_to
                    },
                    'unit': elem['unit']
                }
            else:
                query_dict = {
                    'word': {
                        '$all': blobs
                    },
                    'from': {
                        '$lte': elem_from
                    },
                    'to': {
                        '$gte': elem_to
                    },
                    'unit': elem['unit']
                }
        except KeyError:
            query_dict = {'word': {'$all': blobs}}
        print('qd: %s\n' % query_dict)

        parts_found = db.partdb.find(
            query_dict,
            {
                '_id': False,
                'word': False,
                # 'term': False,
                # 'cond': False,
                # 'param': False,
                # 'manufac': False
            })
        return Parts(parts_found=parts_found,
                     user_term=elem['term'][0],
                     q=self.q)
Esempio n. 4
0
 def __init__(self):
     super().__init__()
     self.docs = ah.MongoDocCreator()
     self.text_to = lp.Parser()
     self.err_to = ErrorHandler()