Esempio n. 1
0
            Body=json.dumps(out),
            ContentType='application/json; charset=utf-8')

    def generate_json(self, rows):
        s3 = boto3.resource('s3')

        out = {'ads': [dict(r) for r in SAVER.get_rows()]}
        s3.Bucket(conf.S3_BUCKET).put_object(
            Key='out.json',
            Body=json.dumps(out),
            ContentType='application/json; charset=utf-8')


if __name__ == '__main__':
    OUT_DIR = 'ads'
    if len(sys.argv) > 1:
        OUT_DIR = sys.argv[1]

    CRAWLER = Crawler(OUT_DIR)
    RUN_ID, ADS = CRAWLER.crawl()
    CRAWLER.upload_to_s3(RUN_ID, ADS)

    SAVER = Saver(cred_file='client_secret.json')
    SAVER.open_workbook('Sleeping Giants Data')

    for AD in ADS:
        SAVER.insert_row(
            [AD['orig'], AD['ad'], AD['twitter_account'], AD['curr_url']])

    CRAWLER.generate_json(SAVER.get_rows())