def download_with_progress(url, out, user=None, password=None): logger.info(f"Downloading {url} as {out}") create_output_dir(out) auth = None if user is not None or password is not None: auth = (user, password) with open(out, 'wb') as f: response = requests.get(url, auth=auth, stream=True) total_length = response.headers.get('content-length') if total_length is None: # no content length header f.write(response.content) else: dl = 0 total_length = int(total_length) for data in response.iter_content(chunk_size=4096): dl += len(data) f.write(data) done = int(50 * dl / total_length) sys.stdout.write("\r[%s%s]" % ('=' * done, ' ' * (50 - done))) sys.stdout.flush() sys.stdout.write("\n") sys.stdout.flush() if out.endswith('.tar'): extract_tar(out) logger.info(f"Done downloading {url} as {out}")
def _main(input_path, output_path): create_output_dir(output_path) for image_path in list_directory(input_path): tweet_id = get_tweet_id_from_image_path(image_path) sentiment_output_path = get_sentiment_output_path( output_path, tweet_id) if os.path.exists(sentiment_output_path): continue logger.debug(f"Loading image {image_path}") with open(image_path, 'rb') as image_file: image = image_file.read() logger.debug(f"Requesting sentiment for {image_path}") extension = os.path.splitext(image_path)[1] response = request_image_sentiment(image, extension) if not response: logger.debug(f"Invalid sentiment for {image_path}") continue sentiment = json.loads(response.content) logger.debug( f"Writing sentiment {sentiment} to {sentiment_output_path}") write_sentiment(sentiment_output_path, sentiment)
def _main(input_path, output_path, period): logger.info( f"Loading Twitter dataset {input_path} and writing to {output_path}") logger.info(f"Period: {period}") create_output_dir(output_path) df = load_twitter_dataset(input_path) pipeline = [ wrapped_partial(select_columns, columns=SELECTED_COLUMNS), normalize_columns, generate_overall_image_sentiment, normalize_columns, generate_fused_sentiment, normalize_columns, generate_overall_categories, wrapped_partial(aggregate_overall_sentiment, period=period), drop_na ] for stage in pipeline: logger.debug(f"Running stage {stage.__name__}") df = stage(df) logger.debug(f"Dataframe sample:\n{df}") logger.info(f"Writing Twitter dataset to {output_path}") df.to_json(output_path)
def _main(output_path): create_output_dir(output_path) with stream_tweets() as stream: for tweet_json in stream.iter_lines(decode_unicode=True): try: tweet = json.loads(tweet_json) except Exception as e: logger.error(f"Error parsing tweet {tweet_json}") if tweet_qualifies_for_use(tweet): save_tweet(tweet, output_path)
def _main(input_path, output_path): create_output_dir(output_path) tweet_image_paths_to_download = [] for tweet in list_tweets(input_path): tweet_image_url = get_tweet_image_url(tweet) tweet_image_path = get_tweet_image_path(tweet, output_path) if not os.path.exists(tweet_image_path): tweet_image_paths_to_download.append( (tweet_image_url, tweet_image_path)) cpu_count = multiprocessing.cpu_count() multiprocessing.Pool(cpu_count * 4).map(download_tweet_image, tweet_image_paths_to_download)
def _main(input_path, output_path, tickers): logger.info(f"Fetching stocks {tickers}") logger.info(f"Using time input range from {input_path}") create_output_dir(output_path) start_at, end_at = get_tweets_timespan(input_path) logger.debug(f"Stocks start time: {start_at}, end time: {end_at}") logger.debug(f"NOTE: This may be smaller than you expect due to weekends") stocks = [fetch_stock_history(ticker, start_at, end_at) for ticker in tickers] for ticker, stocks_df in zip(tickers, stocks): num_rows = len(stocks_df) logger.debug(f"Writing history for {ticker}: {num_rows} rows") write_stock_history(output_path, ticker, stocks_df)
def _main(output_path, input_dirs): logger.info( f"Loading directories {input_dirs} and writing merge to {output_path}") create_output_dir(output_path) input_dirs_lists = get_input_dirs_lists(input_dirs) merged_list = merge_input_dir_lists(input_dirs_lists) logger.debug(f"Merging and writing {list(merged_list)[:10]}") merged_dicts = generate_merged_dicts(input_dirs, merged_list) filtered_dicts = filter(lambda dict_: not not dict_, merged_dicts) flattened_dicts = [ flatten(dict_, reducer='dot') for dict_ in filtered_dicts ] df = pd.DataFrame.from_records(flattened_dicts) df.to_json(output_path) logger.info(f"Wrote merged files to {output_path}")
adjustment_position=adjustment_position) calibration_dir = helpers.create_calibration_dir() helpers.check_existing_calibration(calibration_dir, my_loadcell) if my_loadcell.is_calibrated is not True: helpers.calibrate_loadcell(my_loadcell, calibration_dir) helpers.start_manual_mode(my_controller, my_loadcell, speed=3, mode_button_pin=22, up_button_pin=17, down_button_pin=27) test_parameters = helpers.read_test_parameters(test_type=result) output_dir = helpers.create_output_dir(test_parameters) helpers.save_test_parameters(my_controller, my_loadcell, test_parameters, output_dir) helpers.start_test(my_controller, my_loadcell, test_parameters, output_dir=output_dir, stop_button_pin=22) elif result == 'static': calibration_dir = helpers.create_calibration_dir() helpers.check_existing_calibration(calibration_dir, my_loadcell) if my_loadcell.is_calibrated is not True: helpers.calibrate_loadcell(my_loadcell, calibration_dir) helpers.start_manual_mode(my_controller,
def _main(input_path, output_path): create_output_dir(output_path) files_to_analyze = get_files_to_analyze(input_path, output_path) analyze_tweets(files_to_analyze, output_path) logger.info(f"Entity Sentiment Analysis complete")