Esempio n. 1
0
    def process_all(self):
        try:
            print('process_all ... begin')

            extractor = Extractor(self.language)
            transformer = Transformer()
            extractor.connect()
            num_of_products = extractor.get_num_of_products()

            extractor.execute()
            num_of_rows = 10
            rows = extractor.get_next_batch(num_of_rows)
            pipeline = self.loader.create_pipeline()

            while len(rows) > 0:

                for product in rows:
                    print('\n {}'.format(product))

                    tf = transformer.get_tf(product)
                    print('len tf: {}'.format(len(tf)))
                    self.loader.insert_tf(product['id'], tf, pipeline)

                pipeline.execute()
                rows = extractor.get_next_batch(num_of_rows)

            extractor.close()
            self.loader.count_df()
            self.loader.count_tf_idf(num_of_products)

        except Exception as e:
            print('ETL.process_all(), error: {}'.format(e))
Esempio n. 2
0
    def process_all(self):
        '''This method Extract all the rows SELECTed from the table (mySQL product_translation),
        Transform, and  Load to Redis. This is used for Kill and Fill'''

        try:
            extractor = Extractor()
            #transformer= Transformer( self.params )
            extractor.connect()
            num_of_rows = 20
            extractor.execute()
            rows = extractor.get_next_batch(num_of_rows)
            transformer = Transformer()
            courier = CourierClicoh()
            loader = LoaderCsv()

            while len(rows) > 0:
                products = []
                for row in rows:
                    print('id : {}'.format(row['id']))
                    j = courier.add_product(row)

                    product = transformer.get_csv_row(j, row)
                    '''d = {
                         'id'                   : row[ 'id'                 ], 
                         'sku'                  : row[ 'sku'                ], 
                         'clicoh_id'            : "row[ 'clicoh_id'          ]", 
                         'clicoh_variant_id'    : "row[ 'clicoh_variant_id'  ]",
                    }'''
                    products.append(product)

                loader.write_rows(products)
                rows = extractor.get_next_batch(num_of_rows)

            extractor.close()
            print('\n ETL.procell_all() ... end')

        except Exception as e:
            print('ETL.process_all(), error: {}'.format(e))
            raise
Esempio n. 3
0
def c_01():
    '''get batches of rows from mySQL database. '''
    try:

        e = Extractor()
        e.connect()
        e.execute()
        rows = e.get_next_batch(num_of_rows=2)

        while len(rows) > 0:

            print('\n ut_01.c_01(), looping BATCH of rows')

            for r in rows:
                print('\n {}'.format(r))

            rows = e.get_next_batch(num_of_rows=2)

        e.close()

    except Exception as e:
        print('ut_01.c_01(), error: {}'.format(e))

    print('\n end of case 1.')