Ejemplo n.º 1
0
    def process_all(self):
        try:
            print('process_all ... begin')

            extractor = Extractor(self.language)
            transformer = Transformer()
            extractor.connect()
            num_of_products = extractor.get_num_of_products()

            extractor.execute()
            num_of_rows = 10
            rows = extractor.get_next_batch(num_of_rows)
            pipeline = self.loader.create_pipeline()

            while len(rows) > 0:

                for product in rows:
                    print('\n {}'.format(product))

                    tf = transformer.get_tf(product)
                    print('len tf: {}'.format(len(tf)))
                    self.loader.insert_tf(product['id'], tf, pipeline)

                pipeline.execute()
                rows = extractor.get_next_batch(num_of_rows)

            extractor.close()
            self.loader.count_df()
            self.loader.count_tf_idf(num_of_products)

        except Exception as e:
            print('ETL.process_all(), error: {}'.format(e))
Ejemplo n.º 2
0
def c_07():
    ''' get number of products
    '''
    try:
        extractor = Extractor(language='es')
        extractor.connect()
        n = extractor.get_num_of_products()
        print(n)
    except Exception as e:
        print('c_06(), error: {}'.format(e))