def lambda_handler(params, context): ''' entrance to invoke AWS lambda, variable params contains parameters passed in ''' urls = {} # arranging the paths path = dataset.organize_path_lambda(params) # save the config file urls['config'] = dataset.save_remote_output(path['localSavePath'], path['remoteSavePath'], 'config', params) # prepare input dataset df = dataset.get_remote_input(path['remoteReadPath'], path['filename'], path['localReadPath']) # execute the algorithm output = algorithm(df, params) # upload object to s3 bucket and return the url for key, value in output.items(): if key != 'uid': urls[key] = dataset.save_remote_output(path['localSavePath'], path['remoteSavePath'], key, value) else: urls[key] = value return urls
img_urls = [] source = path['remoteReadPath'].split('/')[-3] if source == "reddit-Search" or source == "reddit-Post" \ or source == "crimson-Hexagon" or source == "reddit-Historical-Post"\ and 'url' in list(df.columns): img_urls = df['url'].dropna().tolist() elif source == "twitter-Tweet" or source == "twitter-Timeline" \ and 'entities.media.media_url' in list(df.columns): img_urls = df['entities.media.media_url'].dropna().tolist() elif source == 'flickr-Photo' and 'size.source' in list(df.columns): img_urls = df['size.source'].dropna().tolist() else: raise ValueError("This data source does not support image collection!") urls = {} for img_url in img_urls: if ic.is_image(img_url): filename, binary = ic.crawler(img_url) dataset.save_local_output(path['localSavePath'], filename, binary) urls['images.zip'] = dataset.save_remote_output( path['localSavePath'], path['remoteSavePath'], 'images.zip') # push notification email notification(toaddr=params['email'], case=3, filename=path['remoteSavePath'], links=urls, sessionURL=params['sessionURL'])
parser.add_argument('--sessionURL', required=True) # user specified parameters parsed, unknown = parser.parse_known_args() for arg in unknown: if arg.startswith("--"): parser.add_argument(arg, required=False) params = vars(parser.parse_args()) # arranging the paths path = dataset.organize_path_lambda(params) # save the config file urls['config'] = dataset.save_remote_output(path['localSavePath'], path['remoteSavePath'], 'config', params) # prepare input dataset df = dataset.get_remote_input(path['remoteReadPath'], path['filename'], path['localReadPath']) # execute the algorithm output = algorithm(df, params) # upload object to s3 bucket and return the url for key, value in output.items(): if key != 'uid': urls[key] = dataset.save_remote_output(path['localSavePath'], path['remoteSavePath'], key, value)