コード例 #1
0
ファイル: query.py プロジェクト: miandu/dynamodb-to-mysql
def output_items_random(cf,path,query,random_no):
    ## Randomly select give number of tweets from results of query, limiting the query to the size 10*random_no
    os.makedirs(path, exist_ok=True)
    total,items=query_items(cf,query,random_no*10)
    if total>0:
        selected_items_ids=random.sample(es_outputs_to_ids(items),min(random_no, len(items)))
        with open(path+query+".csv", 'w') as csvfile:
            for item in general_storage.get_items_by_ids(cf,selected_items_ids):
                utils.write_to_csv(csvfile,[item['post_id'],item['object_id'],item['original_data']['created_at'],item['original_data'].get('user').get('name'),item['message']])           
コード例 #2
0
ファイル: utils.py プロジェクト: miandu/dynamodb-to-mysql
def process_sqs_rerun(cf,queue_name,process,batch_size=100):
    queue_url=sqs.get_url_by_name(queue_name)
    table=general_storage.dynamodb.Table(cf.table_name)
    message,handler=sqs.read_message(queue_url)
    if len(message)>0:
        processed_items=[]
        print("Processing sqs items")
        print(len(message))
        items = general_storage.get_items_by_ids(cf, [x['id'] for x in message])
        print(len(items))
        counter,error=process_rerun(cf,items,process,batch_size)
        print(counter,error)
        if handler and counter > 0:
            sqs.delete_message(queue_url,handler)
        return counter,error
    else:
        print("No message was found")
        return 0,0
コード例 #3
0
if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Normalizer for twitter between DynamoDB and mysql') 
    parser.add_argument('config', type=str, help='an config file for normalizer')
    parser.add_argument('--query', type=str, default=None, help='query to get data for normalizer')
    parser.add_argument('--type', type=str, default="own", help='general or own. general:get everything using query; own:get own post and all replies')
    args = parser.parse_args()
    config = __import__(args.config)
    cf =config.Config() 

    if args.type=="own":     
        query_str = args.query
        if query_str:
            query_str = query_str + " AND user_id:%s AND object_type:post" %(cf.twitter_user_id)
        else:
            query_str="user_id:%s AND object_type:post" %(cf.twitter_user_id)
        total,posts = query.query_items(cf,query_str)
        if total>0:
            for post_id in [x["id"] for x in posts]:
                post_with_comments=general_storage.get_item_and_comments(cf,post_id)
                #print("%s comments" %(len(post_with_comments["comments"])))
                insert_dynamodb_item_into_mysql(cf,post_with_comments["item"])
                for comment in post_with_comments["comments"]:
                    insert_dynamodb_item_into_mysql(cf,comment)
            
    elif args.type=="general":
        #utils.run_until_finish(lambda: utils.process_sqs_rerun(cf,queue_name,process_clara,cf.clara_batch_size))
        db_items=general_storage.get_items_by_ids(cf,query.es_outputs_to_ids(items))
        for i in db_items:
            insert_dynamodb_item_into_mysql(cf,i)