def output(self): from models.mongo import connect_db name = f"shirt_{self.date.strftime(DATESTRFORMAT)}_{self.data['luigi_loc']}" return MongoCellTarget(connect_db(MONGO_SERVER, MONGO_PORT), MONGO_DATABASE, 'shirts', name, 'scope')
def requires(self): from models.mongo import connect_db, find_by_luigi_at from utils.image_munge import check_same conn = connect_db() db = conn[MONGO_DATABASE] shirts = db['shirts'] shopify = db['shopify'] conn = conn.close() gen_shirt_lst = find_by_luigi_at(shirts, self.date) img_lst = [img['scope']['meta']['crop_img'] for img in shopify.find()] for shirt in gen_shirt_lst: # datetime is not JSON serializable shirt['scope']['luigi_at'] = \ shirt['scope']['luigi_at'].strftime(DATESTRFORMAT) is_old = check_same(shirt['scope']['crop_img'], img_lst) if is_old: self.done = True else: yield PostShopify(shirt=shirt)
def output(self): from models.mongo import connect_db name = f"images_{self.date.strftime(DATESTRFORMAT)}_{self.loc}" return MongoCellTarget(connect_db(MONGO_SERVER, MONGO_PORT), MONGO_DATABASE, 'images', name, 'scope')
def output(self): from models.mongo import connect_db idx = Path(self.requires().output().path).stem return MongoCellTarget(connect_db(MONGO_SERVER, MONGO_PORT), MONGO_DATABASE, 'trends', idx, 'scope')
def output(self): from models.mongo import connect_db idx = self.shirt.get('_id') name = idx.replace('shirt', 'shopify') # this needs to go to Mongo in next version return MongoCellTarget(connect_db(MONGO_SERVER, MONGO_PORT), MONGO_DATABASE, 'shopify', name, 'scope')
def requires(self): from models.mongo import connect_db, get_collection, find_by_id client = connect_db() col = get_collection(client, 'tweets', db=MONGO_DATABASE) query = f"img_tweets_{self.date.strftime(DATESTRFORMAT)}_{self.loc}" doc = find_by_id(col, query) client = client.close() for tw in doc['scope']['tweets']: yield CropImage(loc=self.loc, date=self.date, tweet=tw)
def tweet_data(): conn = connect_db() tweets = get_collection(conn, 'tweets') tweets_data = retrieve_all_data(tweets) conn = conn.close() return render_template( 'tweets.html', header='Tweets', subheader='Data', data=tweets_data )
def trend_data(): conn = connect_db() trends = get_collection(conn, 'trends') trends_data = retrieve_all_data(trends) conn = conn.close() return render_template( 'trends.html', header='Trends', subheader='Data', data=trends_data )
def requires(self): from models.mongo import connect_db conn = connect_db() db = conn['viral-tees'] shopify = db['shopify'] # keep these for image comparison on ec2 keep = [x['scope']['meta']['crop_img'] for x in shopify.find()] imgs = glob.glob(os.getcwd() + '/static/images/*') delete = list(set(imgs) - set(keep)) for file in delete: os.remove(file)
def retrieve(date): conn = connect_db() db = conn[MONGO_DATABASE] trend_col = db['trends'] trimmed_col = db['trimmed'] img_col = db['images'] # get from 3 collections trends = [x for x in find_by_luigi_at(trend_col, date)] trimmed = [x for x in find_by_luigi_at(trimmed_col, date)] images = [x for x in find_by_luigi_at(img_col, date)] conn = conn.close() return { 'trends': trends, 'trimmed': trimmed, 'images': images }
def generate_unique_trends(data): luigi_at = data['luigi_at'] current_trends = [x['name'] for x in data['trends']] conn = connect_db() trimmed = get_collection(conn, col='trimmed', db=MONGO_DATABASE) captured_trimmed = find_by_luigi_at(trimmed, luigi_at) check_trend_list = [] for x in captured_trimmed: check_trend_list.append(x['scope']['luigi_all_trend_list']) output_list = [] for trend in current_trends: if not any(trend in sublist for sublist in check_trend_list): output_list.append(trend) conn = conn.close() return output_list