def lambda_handler(params, context): ''' entrance to invoke AWS lambda, variable params contains parameters passed in ''' urls = {} # arranging the paths path = dataset.organize_path_lambda(params) # save the config file urls['config'] = dataset.save_remote_output(path['localSavePath'], path['remoteSavePath'], 'config', params) # prepare input dataset df = dataset.get_remote_input(path['remoteReadPath'], path['filename'], path['localReadPath']) # execute the algorithm output = algorithm(df, params) # upload object to s3 bucket and return the url for key, value in output.items(): if key != 'uid': urls[key] = dataset.save_remote_output(path['localSavePath'], path['remoteSavePath'], key, value) else: urls[key] = value return urls
from image_crawler import image_crawler as ic if __name__ == '__main__': # entrance to invoke Batch urls = {} # default parameters parser = argparse.ArgumentParser(description="processing...") parser.add_argument('--remoteReadPath', required=True) parser.add_argument('--email', required=True) parser.add_argument('--sessionURL', required=True) params = vars(parser.parse_args()) # arranging the paths path = dataset.organize_path_lambda(params) # prepare input dataset df = dataset.get_remote_input(path['remoteReadPath'], path['filename'], path['localReadPath']) img_urls = [] source = path['remoteReadPath'].split('/')[-3] if source == "reddit-Search" or source == "reddit-Post" \ or source == "crimson-Hexagon" or source == "reddit-Historical-Post"\ and 'url' in list(df.columns): img_urls = df['url'].dropna().tolist() elif source == "twitter-Tweet" or source == "twitter-Timeline" \ and 'entities.media.media_url' in list(df.columns):