test_dict = [{"north": 1.22, "east": 2.3, "name": "Lebron James"}, {"north": 1.35, "east": 3.5, "name": "Kevin Durant"}] # test_json_path = "data/5minute.json" # # test = json.load(open(test_json_path, encoding="utf8")) # # with open(test_json_path, encoding="utf8") as in_json: # for line in in_json: # if line != "\n": # print(json.loads(line)) if "import_csv" in RUN_THESE_TESTS or "all" in RUN_THESE_TESTS: twitter_mongo = (test_mongo[0], test_mongo[1], "csv_import") data_import.import_files("data/subset.csv", mongo_connection=twitter_mongo, mongo_address=mongo_address) if "import_json" in RUN_THESE_TESTS or "all" in RUN_THESE_TESTS: twitter_mongo = (test_mongo[0], test_mongo[1], "json_import") data_import.import_files("data/5minute.json", mongo_connection=twitter_mongo, mongo_address=mongo_address) if "dump_csv" in RUN_THESE_TESTS or "all" in RUN_THESE_TESTS: data_import.dump_errors(test_list, "test_csv", "hello/test/1.csv", test_output + "csv_dump/") if "dump_json" in RUN_THESE_TESTS or "all" in RUN_THESE_TESTS: data_import.dump_errors(test_dict, "test_j", "hello/test/1.json", test_output + "csv_dump/")
("192.168.0.97:28003", "twitter", "address")) # set the destination mongodb database twitter_mongo = ("192.168.0.97:30000", "twitter", "tweets") # point to datafiles harvested_tweets = "data/input/final_data/Tweets_Apr12_Aug14.csv" april_tweets = "data/input/final_data/GNIP_April.csv" aug_oct_tweets = "data/input/final_data/GNIP_August_October.csv" # folder to put sliced up csv files (this will allow for parallel inserts and address lookups) output_folder = "data/input/new_data/chunks/" # convert the file names into a tuple to loop through files = (harvested_tweets, april_tweets, aug_oct_tweets) # slice up each file for file_name in files: print("Start slicing: %s at: %s" % (file_name, datetime.now())) data_import.create_partition_csv(file_name, output_folder=output_folder, num_rows=-1, chunk_size=10000) # insert all files from the output folder. Note that the first argument can be a file as well, # in which case the function imports that file only data_import.import_files(output_folder, mongo_connection=twitter_mongo, mongo_address=mongo_address)