def run(): #line:69 OO0O0O0000O0000O0 =""" $$$$$$$$\ $$$$$$$$\ $$$$$$\ $$\ $$$$$$\ \__$$ __|$$ _____|$$ __$$\ $$ | $$ __$$\ $$ | $$ | $$ / \__|$$ | $$ / $$ | $$ | $$$$$\ \$$$$$$\ $$ | $$$$$$$$ | $$ | $$ __| \____$$\ $$ | $$ __$$ | $$ | $$ | $$\ $$ |$$ | $$ | $$ | $$ | $$$$$$$$\ \$$$$$$ |$$$$$$$$\ $$ | $$ | \__| \________| \______/ \________|\__| \__| """#line:82 print(OO0O0O0000O0000O0) #line:84 print('Welcome to the Tesla Image Processing and Computer Vision contest!' ) #line:85 if not os.path.exists(os.path.join(os.getcwd(), 'metadata.json')): #line:87 print('-----------------------------------------------') #line:88 print('Please register before submitting your results.') #line:89 print('You can register by running: python register.py') #line:90 print('-----------------------------------------------') #line:91 sys.exit(0) #line:92 with open('metadata.json', 'r') as OO00OO00O0OOOO000: #line:94 OOO0O00OOOOOO0O00 = json.load(OO00OO00O0OOOO000) #line:95 print('----------------------------') #line:98 print('Evaluating your submissions:') #line:99 print('----------------------------') #line:100 OOOO0O00O0OOO0O0O = 33 #line:102 O0OOO0OO0O00O0000 = [] #line:104 O0O0000O0O0O00OOO, O0OO0OOOO0OO0OO0O = load_dirs(1) #line:105 O00O00000OOOO0OO0 = 0 #line:106 for O0OO00OOO0OO00OO0, (OO0OOOOOO0O00OO0O, O00O000OOOOOOO00O) in enumerate( zip(O0OO0OOOO0OO0OO0O, O0O0000O0O0O00OOO), 1): #line:108 O0OO000OO000O0O0O = decode_answer(O00O000OOOOOOO00O) #line:110 OOO0O00OOO0OOOOOO = cv2.imread(OO0OOOOOO0O00OO0O, 1) #line:111 try: #line:113 OOO0O00OOO0OOOOOO = remove_salt_and_pepper( OOO0O00OOO0OOOOOO) #line:114 OOO0000000OO00OO0 = evaluate_mse(OOO0O00OOO0OOOOOO, O0OO000OO000O0O0O) #line:115 except: #line:116 print( 'Looks like something is wrong with Question 1 submission -- Score of 0 is provided for test case ' + str(O0OO00OOO0OO00OO0)) #line:117 OOO0000000OO00OO0 = 0 #line:118 O00O00000OOOO0OO0 += OOO0000000OO00OO0 * (1 + (O0OO00OOO0OO00OO0 > 3) ) #line:121 O00O00000OOOO0OO0 = (O00O00000OOOO0OO0 / 9) * 30 #line:124 O0OOO0OO0O00O0000.append(O00O00000OOOO0OO0) #line:125 print('Question 1:', O00O00000OOOO0OO0) #line:126 O0O0000O0O0O00OOO, O0OO0OOOO0OO0OO0O = load_dirs(2) #line:128 O00O00000OOOO0OO0 = 0 #line:129 for O0OO00OOO0OO00OO0, (OO0OOOOOO0O00OO0O, O00O000OOOOOOO00O) in enumerate( zip(O0OO0OOOO0OO0OO0O, O0O0000O0O0O00OOO), 1): #line:131 O0OO000OO000O0O0O = decode_answer(O00O000OOOOOOO00O) #line:133 OOO0O00OOO0OOOOOO = cv2.imread(OO0OOOOOO0O00OO0O, 1) #line:134 try: #line:136 OOO0O00OOO0OOOOOO = remove_noise(OOO0O00OOO0OOOOOO) #line:137 OOO0000000OO00OO0 = evaluate_mse(OOO0O00OOO0OOOOOO, O0OO000OO000O0O0O) #line:138 except: #line:139 print( 'Looks like something is wrong with Question 2 submission -- Score of 0 is provided for test case ' + str(O0OO00OOO0OO00OO0)) #line:140 OOO0000000OO00OO0 = 0 #line:141 O00O00000OOOO0OO0 += OOO0000000OO00OO0 * (1 + (O0OO00OOO0OO00OO0 > 3) ) #line:144 O00O00000OOOO0OO0 = (O00O00000OOOO0OO0 / 9) * 30 #line:147 O0OOO0OO0O00O0000.append(O00O00000OOOO0OO0) #line:148 print('Question 2:', O00O00000OOOO0OO0) #line:149 OO0OO000O0OOOO000 = 127 #line:151 O0O0000O0O0O00OOO, O0OO0OOOO0OO0OO0O = load_dirs(3) #line:153 O00O00000OOOO0OO0 = 0 #line:154 for O0OO00OOO0OO00OO0, (OO0OOOOOO0O00OO0O, O00O000OOOOOOO00O) in enumerate( zip(O0OO0OOOO0OO0OO0O, O0O0000O0O0O00OOO), 1): #line:156 O0OO000OO000O0O0O = cv2.cvtColor(decode_answer(O00O000OOOOOOO00O), cv2.COLOR_BGR2GRAY) #line:158 O0OO000OO000O0O0O = O0OO000OO000O0O0O / 255 #line:159 OOO0O00OOO0OOOOOO = cv2.imread(OO0OOOOOO0O00OO0O, 1) #line:160 try: #line:162 OO0000OOOO0OO0000 = threshold_red( OOO0O00OOO0OOOOOO) / 255 #line:163 OOO0000000OO00OO0 = evaluate_mask(OO0000OOOO0OO0000, O0OO000OO000O0O0O) #line:164 except: #line:165 print( 'Looks like something is wrong with Question 3 submission -- Score of 0 is provided for test case ' + str(O0OO00OOO0OO00OO0)) #line:166 OOO0000000OO00OO0 = 0 #line:167 O00O00000OOOO0OO0 += OOO0000000OO00OO0 * (1 + (O0OO00OOO0OO00OO0 > 3) ) #line:170 O00O00000OOOO0OO0 = (O00O00000OOOO0OO0 / 9) * 30 #line:173 O0OOO0OO0O00O0000.append(O00O00000OOOO0OO0) #line:174 print('Question 3:', O00O00000OOOO0OO0) #line:175 O0O0000O0O0O00OOO, O0OO0OOOO0OO0OO0O = load_dirs(4) #line:177 O00O00000OOOO0OO0 = 0 #line:178 for O0OO00OOO0OO00OO0, (OO0OOOOOO0O00OO0O, O00O000OOOOOOO00O) in enumerate( zip(O0OO0OOOO0OO0OO0O, O0O0000O0O0O00OOO), 1): #line:180 O0OO000OO000O0O0O = cv2.cvtColor(decode_answer(O00O000OOOOOOO00O), cv2.COLOR_BGR2GRAY) #line:182 OOO0O00OOO0OOOOOO = cv2.imread(OO0OOOOOO0O00OO0O, 0) #line:183 try: #line:185 OOO0O00OOO0OOOOOO = histogram_equalization( OOO0O00OOO0OOOOOO) #line:186 OOO0000000OO00OO0 = evaluate_histogram( OOO0O00OOO0OOOOOO, O0OO000OO000O0O0O) #line:187 except: #line:188 print( 'Looks like something is wrong with Question 4 submission -- Score of 0 is provided for test case ' + str(O0OO00OOO0OO00OO0)) #line:189 OOO0000000OO00OO0 = 0 #line:190 O00O00000OOOO0OO0 += OOO0000000OO00OO0 * (1 + (O0OO00OOO0OO00OO0 > 3) ) #line:193 O00O00000OOOO0OO0 = (O00O00000OOOO0OO0 / 9) * 30 #line:196 O0OOO0OO0O00O0000.append(O00O00000OOOO0OO0) #line:197 print('Question 4:', O00O00000OOOO0OO0) #line:198 _OO000O000O0O0O0OO, O0OO0OOOO0OO0OO0O = load_dirs(5) #line:200 O00OO00OOOOO00000 = OOOO0O00O0OOO0O0O + OO0OO000O0OOOO000 + 73 #line:202 with open(os.path.join(os.getcwd(), 'eval', 'Question 5', 'count.txt'), 'r') as OO00OO00O0OOOO000: #line:204 O0O00O0O0OO0OOO0O = OO00OO00O0OOOO000.read().strip().split( '/') #line:205 O00O0O000OO0O00O0 = lambda O00OO0OOO00OO000O: int( O00OO0OOO00OO000O, 16) - O00OO00OOOOO00000 #line:207 O0O00O0O0OO0OOO0O = list(map(O00O0O000OO0O00O0, O0O00O0O0OO0OOO0O)) #line:208 O00O00000OOOO0OO0 = 0 #line:209 for O0OO00OOO0OO00OO0, (OO0OOOOOO0O00OO0O, O00O000OOOOOOO00O) in enumerate( zip(O0OO0OOOO0OO0OO0O, O0O0000O0O0O00OOO), 1): #line:211 O0OO000OO000O0O0O = O0O00O0O0OO0OOO0O[O0OO00OOO0OO00OO0 - 1] #line:213 OOO0O00OOO0OOOOOO = cv2.imread(OO0OOOOOO0O00OO0O, 0) #line:214 try: #line:216 OO000OO00O0O00OOO = count_people(OOO0O00OOO0OOOOOO) #line:217 OOO0000000OO00OO0 = evaluate_count(OO000OO00O0O00OOO, O0OO000OO000O0O0O) #line:218 except: #line:219 print( 'Looks like something is wrong with Question 5 submission -- Score of 0 is provided for test case ' + str(O0OO00OOO0OO00OO0)) #line:220 OOO0000000OO00OO0 = 0 #line:221 O00O00000OOOO0OO0 += OOO0000000OO00OO0 * (1 + (O0OO00OOO0OO00OO0 > 3) ) #line:224 O00O00000OOOO0OO0 = (O00O00000OOOO0OO0 / 9) * 30 #line:227 O0OOO0OO0O00O0000.append(O00O00000OOOO0OO0) #line:228 print('Question 5:', O00O00000OOOO0OO0) #line:229 print('-------------------') #line:231 print('Submitting Results:') #line:232 print('-------------------') #line:233 print( 'You can submit results any number of times you want to. ') #line:234 print('We will only consider the final submission regardless of its score.' ) #line:235 OO0000OOO0000OOO0 = input( 'Would you like to submit your scores? (Y/N) ') #line:236 print('--------------------------------------------') #line:237 if OO0000OOO0000OOO0 == 'y' or OO0000OOO0000OOO0 == 'Y': #line:239 print('Submitting...') #line:241 os.environ[ 'GOOGLE_APPLICATION_CREDENTIALS'] = 'ENTER YOUR GOOGLE CREDS HERE' #line:242 OO000O00OOO00OOOO = credentials.ApplicationDefault() #line:243 firebase_admin.initialize_app( OO000O00OOO00OOOO, { 'projectId': 'ENTER YOUR FIREBASE PROJECT ID HERE', }) #line:246 OOOOO00O00O0O00OO = firestore.client() #line:248 O0O0O0OOOO0O00OOO = OOOOO00O00O0O00OO.collection( 'submissions').document(OOO0O00OOOOOO0O00['reg_num']) #line:250 O0O0O0OOOO0O00OOO.set({ 'question1': O0OOO0OO0O00O0000[0], 'question2': O0OOO0OO0O00O0000[1], 'question3': O0OOO0OO0O00O0000[2], 'question4': O0OOO0OO0O00O0000[3], 'question5': O0OOO0OO0O00O0000[4], 'name': OOO0O00OOOOOO0O00['name'], 'id': OOO0O00OOOOOO0O00['id'], 'dept': OOO0O00OOOOOO0O00['dept'], 'year': OOO0O00OOOOOO0O00['year'] }) #line:261 print('Done!') #line:262 else: #line:264 print('Not Submitted.') #line:266
import firebase_admin from firebase_admin import credentials from firebase_admin import firestore credential_app = credentials.ApplicationDefault() firebase_admin.initialize_app(credential_app, { 'projectId': 'todo-list-flask', }) db = firestore.client() def get_users(): return db.collection('users').get() def get_user_by_id(user_id): return db.collection('users')\ .document(user_id).get() def insert_user(user_data): data = { 'password': user_data.password, } db.collection('users').document(user_data.username).set(data) def get_tasks(user_id): return db.collection('users')\ .document(user_id)\
def get(self): return credentials.ApplicationDefault()
def Bigbascket_scraper(): # headless chorme chrome_options = webdriver.ChromeOptions() chrome_options.add_argument("--headless") chrome_options.add_argument("--disable-dev-shm-usage") chrome_options.add_argument("--no-sandbox") #for cloud chrome_options.binary_location = os.environ.get("GOOGLE_CHROME_BIN") driver = webdriver.Chrome( executable_path=os.environ.get("CHROMEDRIVER_PATH"), chrome_options=chrome_options) #forlocal machine #chromedriver = 'E://chromedriver.exe' #driver = webdriver.Chrome(executable_path=chromedriver, chrome_options=chrome_options) url_scrap() url_selected = pd.read_csv(r'bb_selected_link.csv') product_url = url_selected['link'].tolist() p_cat = url_selected['category_name'].tolist() product_url_try = product_url[:2] bb_df = pd.DataFrame(columns=[ 'Product_category', 'Product_name', 'O_price', 'D_price', 'product_quantity', 'Brand_name' ]) for url in product_url_try: driver.get(url) driver.execute_script( "window.scrollTo(0, document.body.scrollHeight/3.5);window.scrollTo(0, document.body.scrollHeight/3.7);" ) time.sleep(30) # 2 Sec for ssh html = driver.execute_script( "return document.documentElement.outerHTML") soup = bs4(html, 'html.parser') # print(soup) print(url) br_name = soup.findAll("h6", {"ng-bind": "vm.selectedProduct.p_brand"}) pr_name = soup.findAll("a", {"ng-bind": "vm.selectedProduct.p_desc"}) or_price = soup.findAll("span", {"class": "mp-price ng-scope"}) disc_price = soup.findAll("span", {"class": "discnt-price"}) p_qty = soup.findAll("div", {"class": "col-sm-12 col-xs-7 qnty-selection"}) #p_cat=soup.findAll("div",{"class":"dp_headding"}) b_name = [] p_name = [] o_price = [] d_price = [] qty = [] p_c = [] for bpoints, ppoints, opoints, dpoints, qpoints, pcatpoints in zip( br_name, pr_name, or_price, disc_price, p_qty, p_cat): b_name.append(str(bpoints.text)) p_name.append(str(ppoints.text)) o_price.append(str(opoints.text)) d_price.append(str(dpoints.text)) qty.append(str(qpoints.text)) p_c.append(str(pcatpoints)) #crete dictionary dictionary = {} keys = [ 'Product_category', 'Product_name', 'O_price', 'D_price', 'product_quantity', 'Brand_name' ] values = [p_c, p_name, o_price, d_price, qty, b_name] bigbasket_df_al = dict(zip(keys, values)) df = pd.DataFrame( dict([(k, pd.Series(v)) for k, v in bigbasket_df_al.items()])) result = bb_df.append(df) bb_df = result print(bb_df) #bb_df.to_csv(r'bb_scaped_data_30nov.csv', index = False) #df=pd.read_csv('bb_scaped_data_30nov.csv') df_copy = bb_df #convert all colms to lower case df_copy[df_copy.columns] = df_copy.apply( lambda x: x.astype(str).str.lower()) #replace new line with space df_copy = df_copy.replace('\n', '', regex=True) df_copy['weight'] = df_copy['product_quantity'].str.split('-').str[0] df_copy['weight'] = df_copy['weight'].str.strip() df_copy['weight'] = df_copy['weight'].replace({'x': '#'}, regex=True) df_copy = df_copy[~df_copy['weight'].str.contains('\#')] df_copy = df_copy[~df_copy['weight'].str.contains('combo|pack of')] df_copy = df_copy[~df_copy['weight'].str.contains('\(')] df_copy['scale'] = df_copy['weight'].astype(str).str.split(' ').str[1] df_copy['scale'] = df_copy['scale'].replace( {'pcs|pellets|sachets|pouch|cup': 'pc'}, regex=True) df_copy['scale'] = df_copy['scale'].replace({'gm': 'g'}, regex=True) df_copy['scale'] = df_copy['scale'].replace({'lt|ltr': 'l'}, regex=True) df_copy['App_brand'] = df_copy['Brand_name'] df_copy["Organic_flag"] = pd.np.where( df_copy['Product_name'].str.contains('organic'), 'organic', 'non_organic') df_copy['O_price'] = df_copy.O_price.str.extract('(\d+)') df_copy['D_price'] = df_copy.D_price.str.extract('(\d+)') df_copy['Weight'] = df_copy.product_quantity.str.extract('(\d+)') df_copy['Unit'] = df_copy['scale'] df_copy['Website'] = 'BigBascket' prod_name_list_dmart = df_copy["Product_name"].tolist() #Reading the standard ingredient list df_col = pd.read_excel("check.xlsx", sheet_name='Details') #lowering the data to maintaing uniformity of dataset df_col = df_col.apply(lambda x: x.astype(str).str.lower()) # connverting data columns to list for further processing sorted_name_list = df_col["Sorted Name"].tolist() product_category_list = df_col["Product_Category"].tolist() app_name = [] for j in prod_name_list_dmart: list1 = [] for i in sorted_name_list: try: if i in j: list1.append(i) except: list1 = [] app_name.append(list1) #fill [] with null keyword index = [i for i, x in enumerate(app_name) if x == []] for i in range(len(app_name)): for j in index: if i == j: app_name[i] = 'Null' #take max length words from list app_name1 = [] for name in app_name: name1 = max((name for name in name if name), key=len) app_name1.append(name1) df_copy['App_name'] = app_name1 app_name_dmart = df_copy["App_name"].tolist() app_cat = [] for i in range(len(app_name_dmart)): list2 = [] for j in range(len(sorted_name_list)): try: if app_name_dmart[i] == sorted_name_list[j]: list2.append(product_category_list[j]) except: list2 = [] app_cat.append(list2) #fill [] with null keyword index = [i for i, x in enumerate(app_cat) if x == []] for i in range(len(app_cat)): for j in index: if i == j: app_cat[i] = 'Null' #take max length words from list app_cat1 = [] for name in app_cat: name1 = max((name for name in name if name), key=len) app_cat1.append(name1) df_copy['App_category'] = app_cat1 cols = df_copy.select_dtypes(['object']).columns df_copy[cols] = df_copy[cols].apply(lambda x: x.str.strip()) df_copy[cols] = df_copy[cols].apply(lambda x: x.str.capitalize()) df_copy.to_csv(r'bb_30nov_all.csv', index=False) df_bb = df_copy[[ 'Product_name', 'App_brand', 'O_price', 'D_price', 'Weight', 'Unit', 'App_name', 'Organic_flag', 'App_category', 'Website' ]] df_bb.to_csv(r'bb_30_nov.csv', index=False) print("bb_30nov.csv created") obj = df_bb.to_json(orient="split") parsed = json.loads(obj) data = json.dumps(parsed, indent=4) #firebase firebase_app = None PROJECT_ID = 'webdatascaper' IS_EXTERNAL_PLATFORM = True # False if using Cloud Functions #if firebase_app: # return firebase_app import firebase_admin from firebase_admin import credentials if IS_EXTERNAL_PLATFORM: cred = credentials.Certificate( 'webdatascaper-firebase-adminsdk-r9721-6539dd32b9.json') else: cred = credentials.ApplicationDefault() firebase_app = firebase_admin.initialize_app( cred, {'storageBucket': f"{PROJECT_ID}.appspot.com"}) name = '/bigbascket/bb_1_dec.json' bucket = storage.bucket() blob = bucket.blob(name) blob.upload_from_string(json.dumps(data, indent=2)) data = json.loads(blob.download_as_string()) print("file Uploaded")
def garmin_push(request): if len(firebase_admin._apps) == 0: cred = credentials.ApplicationDefault() firebase_admin.initialize_app(cred, { 'projectId': config.PROJECT_ID, }) db = firestore.client() # log some stuff (remove this code later as will have some "secrets" in it) logging.info(firebase_admin._apps) logging.info(utils.get_flask_request(request)) # Extract the metadata from the POST form upload_metadata = json.loads(request.form['uploadMetaData']) upload_id = upload_metadata['uploadId'] activity_ids = upload_metadata['activityIds'] oauth_token = upload_metadata['oauthToken'] md5 = upload_metadata['md5'] # Verify the token is valid from Firestore and get the user id oauth_token_ref = db.collection( config.GARMIN_ACCESS_TOKENS_COLLECTION).document(oauth_token) doc = oauth_token_ref.get() if not doc.exists: logging.error('Unknown access token: {}'.format( utils.obfuscate_secret(oauth_token))) return access_token_expired_response() oauth_token_dict = doc.to_dict() if not oauth_token_dict['active']: logging.info( 'Access token expired trying to upload activity: {} {}'.format( utils.obfuscate_secret(oauth_token), activity_ids)) return access_token_expired_response() # Update usages count in transaction increment_usages_in_transaction(db.transaction(), oauth_token_ref) # Check a file was uploaded if 'file' not in request.files: print('No files!') return retry_response() # Check it is of the right type file = request.files['file'] header = file.read(72) file_type = identify_garmin_file_type(header) if not file_type: print('File type not recognized') return retry_response() file.seek(0) # Not sure this should ever fail if we've got this far if file.filename == '': print('No selected file') return retry_response() # Store the file details in the RDS DB # so details are there when parsing is triggered # Upload the file filename = '{}.{}'.format(upload_metadata['uploadId'], file_type) storage = GCPStorage() upload_file_properties = storage.upload_file(config.UPLOAD_BUCKET, config.GARMIN_FOLDER, filename, file, file.content_type) if not upload_file_properties.success: return retry_response() return '', 201, {'Location': upload_file_properties.url}
def _get_access_token(): # Find and apply application default credentials cred = credentials.ApplicationDefault() access_token_info = cred.get_access_token() return access_token_info.access_token
def main(event, context): cred = credentials.ApplicationDefault() firebase_admin.initialize_app(cred, {'projectId': PROJECT_NAME}) db: Client = firestore.client() query_date = _get_query_date() price_ref = db.collection('stock_price') prices: [DocumentSnapshot] = price_ref.where('date', '>=', query_date).get() def build_data(price: DocumentSnapshot) -> dict: data = price.to_dict() return { 'date': data['date'], 'code': data['code'], 'opening_quotation': data['opening_quotation'], 'high': data['high'], 'turnover': data['turnover'], 'closing_quotation': data['closing_quotation'], 'low': data['low'] } stock_prices = list(map(build_data, prices)) predictable_stock_codes = [] data_sets = [] for code in range(1001, 9999): filtered = list(filter(lambda x: x['code'] == code, stock_prices)) if len(filtered) == 0: continue data = sorted(filtered, key=lambda x: x['date']) def build_dataset(d: dict) -> list: return [ d['opening_quotation'], d['high'], d['turnover'], d['closing_quotation'], d['low'] ] data_sets.append(list(map(build_dataset, data))) predictable_stock_codes.append(data[0]['code']) input = list(map(lambda d: {'input': d}, data_sets)) ml = discovery.build('ml', 'v1') name = 'projects/{}/models/{}'.format(PROJECT_NAME, 'stock_predictor') name += '/versions/{}'.format('stock_price_predictor') response = ml.projects().predict(name=name, body={ 'instances': input }).execute() if 'error' in response: raise RuntimeError(response['error']) print(response['predictions']) prediction_results = response['predictions'] # TODO save data at next day or week day l = list(filter(lambda x: x['code'] == code, stock_prices)) # FIXME get from now date s = sorted(l, key=lambda x: x['date']) print(s) predict_datetime = from_rfc3339( s[-1]['date'].rfc3339()) # Get latest date in predicted dataset original_datetime = predict_datetime return while True: delta = timedelta(days=1) predict_datetime = predict_datetime + delta # Skip Japan holidays and JPX holidays (Dec 31, Jun 1, 2 and 3) if not _is_jpx_holiday(predict_datetime): break
def __init__(self, root_path, gcp_project): self.credential = credentials.ApplicationDefault() firebase_admin.initialize_app(self.credential, {'projectId': gcp_project}) self.client = firestore.client() self.root_path = root_path
def __init__(self, project_id, *args, **kwargs): cred = credentials.ApplicationDefault() firebase_admin.initialize_app(cred, { 'projectId': project_id, }) self.db = firestore.client()
def test_nonexisting_path(self, app_default): # pylint: disable=unused-argument with pytest.raises(IOError): credentials.ApplicationDefault()
def firestore_login(): cred = credentials.ApplicationDefault() firebase_admin.initialize_app(cred, { 'projectId': 'coffee-io-k8s', }) return firestore.client()
def initialize_DB(): if not firebase_admin._apps: firebase_admin.initialize_app(credentials.ApplicationDefault(), { 'projectId': 'step-project-ellispis', }) return firestore.client()
def download_dataset(self): """ This method loops through the firestore document database using paginated queries based on the document id. It filters out documents where `target != guess` if `self.target_eq_guess` is True and saves the audio file and target text into separate files. """ PROJECT_ID = 'speak-v2-2a1f1' QUERY_LIMIT = 2000 # max size of query SAMPLES_PER_QUERY = 200 # number of random samples downloaded per query AUDIO_EXT = '.m4a' # extension of downloaded audio audio_dir = os.path.join(self.output_dir, "audio") os.makedirs(audio_dir, exist_ok=True) # verify and set the credientials CREDENTIAL_PATH = "/home/dzubke/awni_speech/speak-v2-2a1f1-d8fc553a3437.json" assert os.path.exists( CREDENTIAL_PATH ), "Credential file does not exist or is in the wrong location." # set the enviroment variable that `firebase_admin.credentials` will use os.putenv("GOOGLE_APPLICATION_CREDENTIALS", CREDENTIAL_PATH) # initialize the credentials and firebase db client cred = credentials.ApplicationDefault() firebase_admin.initialize_app(cred, {'projectId': PROJECT_ID}) db = firestore.client() # create the data-label path and initialize the tsv headers date = datetime.date.today().isoformat() self.data_label_path = os.path.join(self.output_dir, "eval2-v4_data_" + date + ".tsv") self.metadata_path = os.path.join( self.output_dir, "eval2-v4_metadata_" + date + ".json") # re-calculate the constraints in the `config` as integer counts based on the `dataset_size` self.constraints = { name: int(self.constraints[name] * self.num_examples) for name in self.constraints.keys() } # constraint_names will help to ensure the dict keys created later are consistent. constraint_names = list(self.constraints.keys()) print("constraints: ", self.constraints) # id_counter keeps track of the counts for each speaker, lesson, and line ids id_counter = {name: dict() for name in constraint_names} # create a mapping from record_id to lesson, line, and speaker ids disjoint_ids_map = get_record_ids_map(metadata_path, constraint_names) # create a dict of sets of all the ids in the disjoint datasets that will not # be included in the filtered dataset disjoint_id_sets = {name: set() for name in self.disjoint_id_names} for disj_dataset_path in self.disjoint_datasets: disj_dataset = read_data_json(disj_dataset_path) # extracts the record_ids from the excluded datasets record_ids = [ path_to_id(example['audio']) for example in disj_dataset ] # loop through each record id for record_id in record_ids: # loop through each id_name and update the disjoint_id_sets for disjoint_id_name, disjoint_id_set in disjoint_id_sets.items( ): disjoint_id_set.add( disjoint_ids_map[record_id][disjoint_id_name]) # creating a data range from `self.days_from_today` in the correct format now = datetime.datetime.utcnow() day_delta = datetime.timedelta(days=self.days_from_today) day_range = now - day_delta day_range = day_range.isoformat("T") + "Z" with open(self.data_label_path, 'w', newline='\n') as tsv_file: tsv_writer = csv.writer(tsv_file, delimiter='\t') header = [ "id", "target", "guess", "lessonId", "target_sentence", "lineId", "uid", "redWords_score", "date" ] tsv_writer.writerow(header) # create the first query based on the constant QUERY_LIMIT rec_ref = db.collection(u'recordings') # this is the final record_id that was downloaded from the speak training set speak_train_last_id = 'SR9TIlF8bSWApZa1tqEBIHOQs5z1-1583920255' next_query = rec_ref\ .order_by(u'id')\ .start_after({u'id': speak_train_last_id})\ .limit(QUERY_LIMIT)\ # loop through the queries until the example_count is at least the num_examples example_count = 0 # get the ids from the training and testsets to ensure the downloaded set is disjoint train_test_set = self.get_train_test_ids() while example_count < self.num_examples: print(f"another loop with {example_count} examples written") # convert the generator to a list to retrieve the last doc_id docs = list( map(lambda x: self._doc_trim_to_dict(x), next_query.stream())) try: # this time will be used to start the next query last_id = docs[-1]['id'] # if the docs list is empty, there are no new documents # and an IndexError will be raised and break the while loop except IndexError: print("Exiting while loop") break # selects a random sample of `SAMPLES_PER_QUERY` from the total queries #docs = random.sample(docs, SAMPLES_PER_QUERY) for doc in docs: # if num_examples is reached, break if example_count >= self.num_examples: break target = process_text(doc['info']['target']) # check that the speaker, target-sentence, and record_Id are disjoint if doc['user']['uid'] not in disjoint_id_sets['speaker']\ and target not in disjoint_id_sets['target_sentence']\ and doc['id'] not in train_test_set: # set `self.target_eq_guess` to True in `init` if you want ## to filter by `target`==`guess` if self.target_eq_guess: # process the target and guess and remove apostrophe's for comparison guess = process_text(doc['result']['guess']) # removing apostrophes for comparison target_no_apostrophe = target.replace("'", "") guess_no_apostrophe = guess.replace("'", "") # if targ != guess, skip the record if target_no_apostrophe != guess_no_apostrophe: continue # if `True` constraints on the records downloaded will be checked if self.check_constraints: # create a mapping to feed into `check_update_constraints` record_ids_map = { doc['id']: { 'lesson': doc['info']['lessonId'], 'target_sentence': target, # using processed target as id 'speaker': doc['user']['uid'] } } pass_constraint = check_update_contraints( doc['id'], record_ids_map, id_counter, self.constraints) # if the record doesn't pass the constraints, continue to the next record if not pass_constraint: continue # save the audio file from the link in the document audio_url = doc['result']['audioDownloadUrl'] audio_path = os.path.join(audio_dir, doc['id'] + AUDIO_EXT) # convert the downloaded file to .wav format # usually, this conversion done in the preprocessing step # but some eval sets don't need PER labels, and so this removes the need to # preprocess the evalset. base, raw_ext = os.path.splitext(audio_path) # use the `.wv` extension if the original file is a `.wav` wav_path = base + os.path.extsep + "wav" # if the wave file doesn't exist, convert to wav if not os.path.exists(wav_path): try: to_wave(audio_path, wav_path) except subprocess.CalledProcessError: # if the file can't be converted, skip the file by continuing logging.info( f"Process Error converting file: {audio_path}" ) continue # save the target in a tsv row # tsv header: "id", "target", "guess", "lessonId", "target_id", "lineId", "uid", "date" tsv_row = [ doc['id'], doc['info']['target'], doc['result']['guess'], doc['info']['lessonId'], target, # using this to replace lineId doc['info']['lineId'], doc['user']['uid'], doc['result']['score'], doc['info']['date'] ] tsv_writer.writerow(tsv_row) # save all the metadata in a separate file #with open(self.metadata_path, 'a') as jsonfile: # json.dump(doc, jsonfile) # jsonfile.write("\n") example_count += 1 # create the next query starting after the last_id next_query = (rec_ref\ .order_by(u'id')\ .start_after({u'id': last_id})\ .limit(QUERY_LIMIT) )
def download_dataset(self): """ This method loops through the firestore document database using paginated queries based on the document id. It filters out documents where `target != guess` and saves the audio file and target text into separate files. The approach to index the queries based on the document `id` is based on the approach outlined here: https://firebase.google.com/docs/firestore/query-data/query-cursors#paginate_a_query """ PROJECT_ID = 'speak-v2-2a1f1' QUERY_LIMIT = 2500 NUM_PROC = 50 AUDIO_EXT = ".m4a" # verify and set the credientials CREDENTIAL_PATH = "/home/dzubke/awni_speech/speak-v2-2a1f1-d8fc553a3437.json" assert os.path.exists( CREDENTIAL_PATH ), "Credential file does not exist or is in the wrong location." # set the enviroment variable that `firebase_admin.credentials` will use os.putenv("GOOGLE_APPLICATION_CREDENTIALS", CREDENTIAL_PATH) # create the data-label path and initialize the tsv headers audio_dir = os.path.join(self.output_dir, "audio") today = datetime.date.today().isoformat() metadata_path = os.path.join(self.output_dir, f"{self.metadata_fname}_{today}.tsv") # initialize the credentials and firebase db client cred = credentials.ApplicationDefault() firebase_admin.initialize_app(cred, {'projectId': PROJECT_ID}) db = firestore.client() # create the first query based on the constant QUERY_LIMIT rec_ref = db.collection(u'recordings') # if `last_id` is not set, start the query from the beginning if self.last_id is None: next_query = rec_ref.order_by(u'id').limit(QUERY_LIMIT) # write a header to a new file with open(metadata_path, 'w', newline='\n') as tsv_file: tsv_writer = csv.writer(tsv_file, delimiter='\t') header = [ "id", "target", "lessonId", "lineId", "uid", "redWords_score", "date" ] # add the audio url if not downloading audio if not self.download_audio: header.append("audio_url") tsv_writer.writerow(header) # or begin where a previous run left off at `self.last_id` else: next_query = (rec_ref.order_by(u'id').start_after({ u'id': self.last_id }).limit(QUERY_LIMIT)) start_time = time.time() query_count = 0 # these two lines can be used for debugging by only looping a few times #loop_iterations = 5 #while loop_iterations > 0: # loops until break is called in try-except block while True: # converting generator to list to it can be referenced mutliple times # the documents are converted to_dict so they can be pickled in multiprocessing docs = list( map(lambda x: self._doc_trim_to_dict(x), next_query.stream())) try: # this `id` will be used to start the next query last_id = docs[-1][u'id'] # if the docs list is empty, there are no new documents # and an IndexError will be raised and break the while loop except IndexError: break # fill in the keyword arguments of the multiprocess download function mp_function = functools.partial(self.multiprocess_download, audio_dir=audio_dir, metadata_path=metadata_path, audio_ext=AUDIO_EXT) #self.singleprocess_record(docs) pool = Pool(processes=NUM_PROC) results = pool.imap_unordered(mp_function, docs, chunksize=1) pool.close() pool.join() # print the last_id so the script can pick up from the last_id if something breaks query_count += QUERY_LIMIT print(f"last_id: {last_id} at count: {query_count}") print( f"script duration: {round((time.time() - start_time)/ 60, 2)} min" ) # create the next query starting after the last_id next_query = (rec_ref.order_by(u'id').start_after({ u'id': last_id }).limit(QUERY_LIMIT))
def garmin_signup(request): # the firebase app may or may not be initialized; if not, we do it now # we need to edit this code to check if the correct app (i.e. the one in the config) is the one initialized # there may be more than one app (e.g. during migration to new app id)... if len(firebase_admin._apps) == 0: cred = credentials.ApplicationDefault() firebase_admin.initialize_app(cred, { 'projectId': config.PROJECT_ID, }) db = firestore.client() # log some stuff (remove this code later as will have some "secrets" in it) logging.info(firebase_admin._apps) logging.info(utils.get_flask_request(request)) # get & decode the idToken JWT to identify the user; verify not expired id_token = request.args.get('id_token', None) if id_token is None: return make_response('No id_token supplied', 401) try: decoded_token = auth.verify_id_token(id_token) uid = decoded_token['uid'] logging.info('User id_token decoded: uid={} ({})'.format( uid, utils.obfuscate_secret(id_token))) except Exception as e: logging.error('User id_token decode error: {}: {} ({})'.format( type(e), e, utils.obfuscate_secret(id_token))) return make_response('User id_token decode error: {}'.format(e), 401) # as per the Garmin docs, we should ensure only one access token is active at any one time # we do this in three ways: # 1. ensure a "force" is set when requesting a token (here) if a token already exists for a user # (so they can't accidentally create a new one) # 2. removing a token when the access token is finally created on the callback # 3. sending a 503 access token revoked on the activity upload # Step 1. is needed rather than silently updating the token because a token update will sync all historic activities user_ref = db.collection(config.USERS_COLLECTION).document(uid) user_doc = user_ref.get() if user_doc.exists: user = user_doc.to_dict() if user.get('access_token'): logging.info('Access token already exists for {} ({})'.format( uid, utils.obfuscate_secret(user['access_token']))) if not request.args.get('confirm_expire_existing_token', False): # do some other check here (e.g. ?force=1) or whatever; return make_response( 'An access token already exists for that user & confirm_expire_existing_token not set', 403) # don't actually delete the token at this point... else: logging.info( 'User has never set up an access token: {}'.format(uid)) else: logging.info('User document doesn\'t exist yet: {}'.format(uid)) # make the request for a oauth token & secret to Garmin # create the client client = oauthlib.oauth1.Client(client_key=config.GARMIN_KEY, client_secret=config.GARMIN_SECRET, signature_type='BODY', callback_uri=config.GARMIN_CALLBACK_URL) # sign the request uri, headers, body = client.sign( config.GARMIN_REQUEST_TOKEN_URL, http_method='POST', headers={'Content-Type': 'application/x-www-form-urlencoded'}, body='') # submit the request to Garmin & handle an error response = requests.post(uri + '?' + body) if response.status_code != 200: logging.error('Garmin request token response invalid: {} {}'.format( response.status_code, response.reason)) return make_response( 'Garmin request token response invalid: {}'.format( response.reason), 403) # extract the oath token & secret qs = parse.parse_qs(response.text) oauth_token_request = qs['oauth_token'][0] oauth_token_secret_request = qs['oauth_token_secret'][0] # should never happen unless Garmin do something dumb.... # we should remove this as it just costs one doc retrieval doc_ref = db.collection( config.GARMIN_REQUEST_TOKENS_COLLECTION).document(oauth_token_request) doc = doc_ref.get() if doc.exists: return make_response('That token has already been used', 403) # store token and corresponding uid -- we don't deactivate/delete any other token assocaited with that uid yet # (only on callback) doc_ref.set({ 'secret': oauth_token_secret_request, 'used': False, 'uid': uid, 'created_ts': dt.utcnow() }) # Redirect to Garmin for sign in & authorize app # e.g. https://connect.garmin.com/oauthConfirm?oauth_token=58166e54-ccd4-47cd-bd21-880b33c09b25 logging.info( 'SUCCESS. Got uid, Garmin request token & inserted into database. Redirecting. (uid={}, oauth_token={}, oauth_token_secret={})' .format(uid, utils.obfuscate_secret(oauth_token_request), utils.obfuscate_secret(oauth_token_secret_request))) return redirect(config.GARMIN_SIGN_IN_URL + '?oauth_token=' + oauth_token_request)
def firebase_init(project_id): # Use the application default credentials cred = credentials.ApplicationDefault() firebase_admin.initialize_app(cred, { 'projectId': project_id}) return
def getRecipeTweet(event, context): # cred = credentials.Certificate("buzzrecipematome-firebase-adminsdk-h15r1-c07517e0f1.json") # firebase_admin.initialize_app(cred) # Use the application default credentials # cred = credentials.ApplicationDefault() # firebase_admin.initialize_app(cred, { # 'projectId': 'buzzrecipematome', # }) if (not len(firebase_admin._apps)): cred = credentials.ApplicationDefault() firebase_admin.initialize_app(cred, { 'projectId': 'buzzrecipematome', }) db = firestore.client() # consumer_key = settings.CONSUMER_KEY # consumer_secret = settings.CONSUMER_SECRET # access_token_key = settings.ACCESS_TOKEN_KEY # access_token_secret = settings.ACCESS_TOKEN_SECRET consumer_key = os.getenv('CONSUMER_KEY') consumer_secret = os.getenv('CONSUMER_SECRET') access_token_key = os.getenv('ACCESS_TOKEN_KEY') access_token_secret = os.getenv('ACCESS_TOKEN_SECRET') auth = tweepy.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token_key, access_token_secret) api = tweepy.API(auth) statuses = api.list_timeline(slug='recipe', owner_screen_name='flowphantom') docs = db.collection('recipe').stream() exists_tweet_list = list(map(lambda doc: doc.to_dict()['tweetLink'], docs)) recipe_words = ['フライパン', '電子レンジ', '500W', '600W'] doc_ref = db.collection('recipe').document() for status in statuses: status = status._json # status = api.get_status(tweetID)._json try: if status['text'][0:2] == 'RT': continue url = 'https://twitter.com/' + status['user'][ 'screen_name'] + '/status/' + status['id_str'] # 既レシピは追加しない if url in exists_tweet_list: continue # レシピワードが含まれているか検査する has_recie_word = any( list( map(lambda recipe_word: recipe_word in status['text'], recipe_words))) # レシピワードがないツイートは追加しない if not (has_recie_word): continue doc_ref.set({ 'tweetLink': url, 'tweetText': status['text'], }) print(url) print(status['text']) print('------ ') time.sleep(2) except IndexError: pass
def uploadDatabase(src): # Use the application default credentials cred = credentials.ApplicationDefault() firebase_admin.initialize_app(cred, { 'projectId': 'bokx-fa37d', }) db = firestore.client() with open(src, "r") as source: csvreader = csv.reader(source, delimiter=',') skip = 1 csvreader.next() count = 0 while count < skip: count = count + 1 csvreader.next() print 'skipped line: ', count for row in csvreader: shoeid = str(row[0]) brand = str(row[1]) series = str(row[2]) model = str(row[3]) version = str(row[4]) name = str(row[5]) style = str(row[6]) colorway = str(row[7]) retail_price = str(row[8]) release_date = str(row[9]) image_source = str(row[11]) data = { u'brand': unicode(brand), u'series': unicode(series), u'model': unicode(model), u'version': unicode(version), u'name': unicode(name), u'style': unicode(style), u'colorway': unicode(colorway), u'retail_price': unicode(retail_price), u'release_date': unicode(release_date), u'image_source': unicode(image_source), u'shoeid': unicode(shoeid) } if brand == 'null': if ('adidas' in name) or ('Adidas' in name): catalog_ref = db.collection(u'shoeCatalog').document(u'adidas') \ .collection(u'name').document(name) else: catalog_ref = db.collection(u'shoeCatalog').document(u'null') \ .collection(u'name').document(name) else: # if series == 'null': # catalog_ref = db.collection(u'shoeCatalog').document(brand) \ # .collection(u'series').document(u'null') \ # .collection(u'name').document(name) # else: # if model == 'null': # catalog_ref = db.collection(u'shoeCatalog').document(brand) \ # .collection(u'series').document(series) \ # .collection(u'model').document(u'null') \ # .collection(u'name').document(name) # else: # if version == 'null': # catalog_ref = db.collection(u'shoeCatalog').document(brand) \ # .collection(u'series').document(series) \ # .collection(u'model').document(model) \ # .collection(u'version').document(u'null') \ # .collection(u'name').document(name) # else: # catalog_ref = db.collection(u'shoeCatalog').document(brand) \ # .collection(u'series').document(series) \ # .collection(u'model').document(model) \ # .collection(u'version').document(version) \ # .collection(u'name').document(name) map_ref = db.collection(u'shoeMap').document(shoeid) catalog_ref.set(data) map_ref.set(data) count = count + 1 print 'uploaded shoe data: #', count, ' ', name uploadDatabase('../stockxData/result10.csv')
def assess_iphone_models(save_path:str)->None: """This function seeks to identify the distribution of iphone models across a random sample of Speak's userbase. A historgram will be created of the number of users on each iphone model. Args: save_path (str): path where iphone count will be saved as pickle """ PROJECT_ID = 'speak-v2-2a1f1' QUERY_LIMIT = 10000 # verify and set the credientials CREDENTIAL_PATH = "/home/dzubke/awni_speech/speak-v2-2a1f1-d8fc553a3437.json" # CREDENTIAL_PATH = "/Users/dustin/CS/consulting/firstlayerai/phoneme_classification/src/awni_speech/speak-v2-2a1f1-d8fc553a3437.json" # set the enviroment variable that `firebase_admin.credentials` will use os.putenv("GOOGLE_APPLICATION_CREDENTIALS", CREDENTIAL_PATH) # initialize the credentials and firebase db client cred = credentials.ApplicationDefault() firebase_admin.initialize_app(cred, {'projectId': PROJECT_ID}) db = firestore.client() rec_ref = db.collection(u'recordings') iphone_model_count = Counter() n_iphone_models = 100000 while sum(iphone_model_count.values()) < n_iphone_models: print("inside while loop") next_query = rec_ref.order_by(u'id').limit(QUERY_LIMIT) for doc in next_query.stream(): doc = doc.to_dict() # only select dates in 2020 rec_date = doc.get('info', {}).get('date', None) if isinstance(rec_date, str): if rec_date.startswith('2020'): # get the iphone model iphone_model = doc.get('user', {}).get('deviceModelIdentifier', None) if iphone_model is not None: # iphone_model has the formate 'iPad8,2', so splitting off second half iphone_model = iphone_model.split(',')[0] iphone_model_count[iphone_model] += 1 #iphone_model_count = dict(iphone_model_count) write_pickle(save_path, iphone_model_count) # plot the iphone model counts model_names, model_counts = list(zip(*iphone_model_count.most_common())) plt.plot(model_names, model_counts) plt.xticks(model_names, model_names, rotation=45) fig, ax = plt.subplots(constrained_layout=True) ax.bar(model_names, model_counts) plt.xticks(model_names, model_names, rotation=45) total = sum(model_counts) # plot the aggregate and percent of total values on both axes def _agg2percent_forward(x, total): return x/total def _agg2percent_backward(x, total): return x*total # create the forward and backward transforms for the axis forward_transform = partial(_agg2percent_forward, total=total) backward_transform = partial(_agg2percent_backward, total=total) # create the secondary axis secaxy = ax.secondary_yaxis('right', functions=(forward_transform, backward_transform)) # add the plot labels for each axis ax.set_ylabel("Device model count") secaxy.set_ylabel("Percent of total device count") plt.xlabel("Device names")
def start_process(): start_time = time() storage_client = Client() scheduler_client = CloudSchedulerClient() scheduler_path = scheduler_client.location_path(config.PROJECT_ID, config.REGION_ID) cred = credentials.ApplicationDefault() try: scheduler_client.delete_job( f"{scheduler_path}/jobs/{config.CRON_NAME}") except GoogleAPICallError or PermissionDenied: logging.warning("course-collect manually triggered") try: scheduler_client.delete_job(f"{scheduler_path}/jobs/forcequit") except GoogleAPICallError or PermissionDenied: logging.warning("forcequit job does not exist") if not _apps: initialize_app(cred, {"projectId": config.PROJECT_ID}) logging.info("initializing firebase") firebase_db = firestore.client() if storage_client.bucket(config.BUCKET_NAME).exists(): logging.info("reading from existing bucket") coursepickle_bucket = storage_client.bucket(config.BUCKET_NAME) else: logging.info("creating new bucket") coursepickle_bucket = storage_client.create_bucket(config.BUCKET_NAME) # Get unfinished course codes coursecode_blob = coursepickle_bucket.blob(config.COURSE_CODE_BLOB_NAME) try: coursecode_raw = coursecode_blob.download_as_string() unique_course_codes = pickle.loads(coursecode_raw) except NotFound: # Fetch course metadata per code for instructor, schedule, time, location, GPA, grade distributions all_courses = get_all_courses(firebase_db) unique_course_codes = set( [course["code"] for course in all_courses.values()]) # Get existing course metadata coursepickle_blob = coursepickle_bucket.blob( config.COURSE_METADATA_BLOB_NAME) try: course_metadata_raw = coursepickle_blob.download_as_string() course_metadata = pickle.loads(course_metadata_raw) except NotFound: course_metadata = {} course_metadata = course_metadata if course_metadata else {} # Conform to free tier limits (looks like {"runtime": 123, "datetime": datetime(...)} last_modified_blob = coursepickle_bucket.blob( config.LAST_MODIFIED_BLOB_NAME) try: last_modified_raw = last_modified_blob.download_as_string() last_modified = pickle.loads(last_modified_raw) except NotFound: last_modified = {} last_modified = last_modified if last_modified else { "runtime": 0, "datetime": None } check_free_tier_force_exit( scheduler_client, scheduler_path, get_curr_runtime(last_modified["runtime"], start_time)) if last_modified[ "datetime"] and last_modified["datetime"].day < datetime.now().day: last_modified["runtime"] = 0 if bool(int(config.UPDATE_EXTRA_FIELDS)): course_code_done = [] for code in unique_course_codes: try: logging.info(f"Checking class {code}") print(code) split_code = code.split() pg = requests_connectionerror_bypass( config.SCHEDULE_TARGET_URL_FMT, [config.LATEST_TERM, *split_code], scheduler_client, scheduler_path, last_modified, start_time) html_content = requests_bandwith_bypass( pg, config.SCHEDULE_TARGET_URL_FMT, split_code, scheduler_client, scheduler_path, last_modified, start_time) class_ddtitle = html_content.find_all("th", {"scope": "colgroup"}, class_="ddtitle") class_titles = [ th.a.text for th in class_ddtitle if "table" in str(th.find_next("tr")) ] class_dddefaults = [ str(c).replace("\n", "") for c in html_content.find_all("td", class_="dddefault") if "cc.gatech.edu" in c.text or "students" in c.text or "lecture" in c.text or "Semester" in c.text ] class_terms = [ re.search( "(?<=Associated Term: </span>)([a-zA-Z0-9'\s]*)(?=<br)", c).group(0).strip() for c in class_dddefaults ] class_registration_dates = [ re.search( "(?<=Registration Dates: </span>)([a-zA-Z0-9,\s]*)(?=<br)", c).group(0).strip() for c in class_dddefaults ] class_attributes = [ re.search("(?<=Attributes: </span>)([^<]*)(?=<br)", c).group(0).strip() if "Attributes" in c else None for c in class_dddefaults ] class_grade_bases = [ re.search("(?<=Grade Basis: </span>)([A-Z0-9\s]*)(?=<br)", c).group(0).strip() for c in class_dddefaults ] class_table = html_content.find_all( "table", class_="datadisplaytable")[1:-1] class_schedule_headers = [[ "_".join(header.text.lower().split()) for header in table.find_all("th") ] for table in class_table] class_schedule_data = [[ header.text.replace("(P)", "").strip() for header in table.find_all("td") ] for table in class_table] for c in class_schedule_data: c[-1] = " ".join(c[-1].split()) instructor_emails = [ re.search( "([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9_-]+)", str(c)).group(1) if "mailto" in str(c) else None for c in class_table ] pg = requests_connectionerror_bypass( config.CRITIQUE_TARGET_URL_FMT, split_code, scheduler_client, scheduler_path, last_modified, start_time) html_content = requests_bandwith_bypass( pg, config.CRITIQUE_TARGET_URL_FMT, split_code, scheduler_client, scheduler_path, last_modified, start_time) critique_table = html_content.find("table", {"id": "dataTable"}) critique_headers = [ "_".join(th.text.lower().split()) for th in critique_table.find_all("th") ][1:] critique_data_raw = [ td.text for td in critique_table.find_all("td") ] critique_data = [ critique_data_raw[x:x + len(critique_headers) + 1] for x in range(0, len(critique_data_raw), len(critique_headers) + 1) ] critique_instructors = [] for i in range(len(critique_data)): critique_instructors.append(" ".join( critique_data[i][0].split(", ")[::-1])) del critique_data[i][0] critique_data[i] = [critique_data[i][0]] + [ float(x) for x in critique_data[i][1:] ] critique_averages = {} for i in range(len(critique_instructors)): critique_averages[critique_instructors[i]] = dict( zip(critique_headers, critique_data[i])) for i in range(len(class_titles)): try: schedule = dict( zip(class_schedule_headers[i], class_schedule_data[i])) except: print(i) raise RuntimeError course_metadata[class_titles[i]] = { "terms": class_terms[i], "registration_dates": class_registration_dates[i], "attributes": class_attributes[i], "grade_basis": class_grade_bases[i], "schedule": schedule, "instructor_email": instructor_emails[i], "averages": critique_averages[schedule["instructors"]] if schedule["instructors"] in critique_averages else None } course_code_done.append(code) except RuntimeError as e: write_blobs_before_exit(coursepickle_blob, coursecode_blob, last_modified_blob, course_metadata, unique_course_codes, course_code_done, last_modified, start_time) schedule_next_try(scheduler_client, scheduler_path) raise e """ Fetch per course seat, credit, and requirement information """ for i in range(config.START_IDX, config.END_IDX): try: logging.info(f"Checking class with id {i}") pg = requests_connectionerror_bypass( config.REGISTRATION_TARGET_URL_FMT, [config.LATEST_TERM, i], scheduler_client, scheduler_path, last_modified, start_time) html_content = requests_bandwith_bypass( pg, config.REGISTRATION_TARGET_URL_FMT, [i], scheduler_client, scheduler_path, last_modified, start_time) if "-" not in html_content.text: logging.info(f"skipping {i}") continue class_general = html_content.find_all("th", {"scope": "row"}, class_="ddlabel")[0].text # For classes with dashes in the class name, replace them one by one with spaces # TODO retain dashes by using an alternative delimiter like " - " while len(re.findall("-", class_general)) != 3: class_general = re.sub("-", " ", class_general, 1) class_general_delimited = [ s.strip() for s in class_general.split("-") ] class_name = class_general_delimited[0] class_id = int(class_general_delimited[1]) class_code = class_general_delimited[2] class_dddefault = " ".join( html_content.find_all("td", class_="dddefault")[0].text.replace( "\n", " ").split()) class_credits = float( re.search("\d+\.\d+(?=\s+Credits)", class_dddefault).group(0)) class_seats = [ int( re.search("Seats (-*\d+) (-*\d+) (-*\d+)", class_dddefault).group(x)) for x in range(1, 4) ] class_waitlist_seats = [ int( re.search("Waitlist Seats (-*\d+) (-*\d+) (-*\d+)", class_dddefault).group(x)) for x in range(1, 4) ] # Regex search method depends on prerequisites and restrictions combination if "Prerequisites" in class_dddefault: if "Restrictions" in class_dddefault: class_prerequisites = re.search("Prerequisites: (.*)", class_dddefault).group(1) class_restrictions = re.search( "Restrictions: (.*) Prerequisites", class_dddefault).group(1) else: class_prerequisites = re.search("Prerequisites: (.*)", class_dddefault).group(1) class_restrictions = None else: if "Restrictions" in class_dddefault: class_prerequisites = None class_restrictions = re.search("Restrictions: (.*)", class_dddefault).group(1) else: class_prerequisites = None class_restrictions = None course_dict = { "id": class_id, "code": class_code, "name": class_name, "credits": class_credits, "seats": { "capacity": class_seats[0], "actual": class_seats[1], "remaining": class_seats[2] }, "waitlist": { "capacity": class_waitlist_seats[0], "actual": class_waitlist_seats[1], "remaining": class_waitlist_seats[2] }, "restrictions": class_restrictions, "prerequisites": class_prerequisites, "last_updated": datetime.now() } if class_general in course_metadata: course_dict.update(course_metadata[class_general]) # Send all collected class metadata firebase_db.collection(u'{}'.format( config.PRIMARY_TABLE_NAME)).document( u'{}'.format(class_id)).set(course_dict) all_table_name = f"{config.SECONDARY_TABLE_NAME}{i // 500}" all_courses_doc = firebase_db.collection( u'{}'.format(all_table_name)).document( u'{}'.format("all_courses")).get() if all_courses_doc.exists: all_courses = all_courses_doc.to_dict() all_courses[str(class_id)] = course_dict firebase_db.collection(u'{}'.format(all_table_name)).document( u'{}'.format("all_courses")).set(all_courses) else: firebase_db.collection(u'{}'.format(all_table_name)).document( u'{}'.format("all_courses")).set( {str(class_id): course_dict}) except RuntimeError as e: write_blobs_before_exit(coursepickle_blob, coursecode_blob, last_modified_blob, course_metadata, [], [], last_modified, start_time) schedule_next_try(scheduler_client, scheduler_path) raise e # Delete all blobs coursepickle_blob.delete() coursecode_blob.delete() last_modified_blob.delete() schedule_next_try(scheduler_client, scheduler_path, adjust_cron=timedelta(days=1)) return "200 OK"
def test_nonexisting_path(self, app_default): del app_default with pytest.raises(IOError): credentials.ApplicationDefault()
'tr:nth-child(2) > td:nth-child(2)::text').get() scores['conference'] = table.css( 'tr:nth-child(3) > td:nth-child(2)::text').get() scores['streak'] = table.css( 'tr:nth-child(4) > td:nth-child(2)::text').get() scores['home'] = table.css( 'tr:nth-child(5) > td:nth-child(2)::text').get() scores['away'] = table.css( 'tr:nth-child(6) > td:nth-child(2)::text').get() scores['neutral'] = table.css( 'tr:nth-child(7) > td:nth-child(2)::text').get() yield scores oApp = initialize_app(credentials.ApplicationDefault(), name='hodp-scraping') class TeamsCrawlerRunner(CrawlerRunner): """ Crawler object that collects items and returns output after finishing crawl. """ def crawl(self, crawler_or_spidercls, *args, **kwargs): self.settings = None self.COLLECTION_NAME = 'sports-scores' self.store = firestore.client(oApp) self.collection_ref = self.store.collection(self.COLLECTION_NAME) self.batch = self.store.batch() # create crawler (Same as in base CrawlerProcess) crawler = self.create_crawler(crawler_or_spidercls)
from bs4 import BeautifulSoup import re import urllib import firebase_admin from firebase_admin import credentials, firestore # configure local or cloud try: from config import PROJECT # only cloud except: PROJECT = 'torbjorn-zetterlund' # only local logging.basicConfig(filename='test.log', level=logging.INFO) # log only local # initialize firebase sdk CREDENTIALS = credentials.ApplicationDefault() firebase_admin.initialize_app(CREDENTIALS, { 'projectId': PROJECT, }) # get firestore client db = firestore.client() keywords = [] def main(request): # Fake Real Browser headers = { 'User-agent' : 'Mozilla/11.0' } searchquery_ref = db.collection(u'searchquery')
""" 태그를 정리한 txt파일을 이용하여, 파이어베이스의 stlye collection내의 옷 사진들의 태그를 업데이트합니다. """ import firebase_admin from firebase_admin import firestore from firebase_admin import credentials cred = credentials.ApplicationDefault() # auth 파일을 엽니다. firebase_admin.initialize_app(cred, { 'projectId': 'fashionistagram-66015', }) # 저희의 프로젝트 이름입니다. db = firestore.client() # db client입니다. image_feature = dict() # 태그를 저장할 dict 객체입니다. with open('태그0010~1050.txt', 'r') as f: lines = f.readlines()[2:] # 한글 속성 줄 제거 for line in lines: line = line[:-1] # \n 제거. line = line.split('\t') num = int(line[0]) tag = ('').join(line[1:]) image_feature[num] = tag print(image_feature.keys()) # 태그 파일을 잘 읽어왔는지 확인합니다. for image_num in image_feature.keys(): if image_num < 1000: image_num = '00' + str(image_num)
def sl_predict(event, context): #download image to predict download_image(event, context) #stop when wrong file uploaded if '.jpg' not in file_nama: return 0 #deploy model locally global model if not model: download_model_file() model = tf.keras.models.load_model('/tmp/model.h5') #initialize firestore global db if not db: # Use the application default credentials cred = credentials.ApplicationDefault() firebase_admin.initialize_app(cred, { 'projectId': 'white-device-312612', }) db = firestore.client() #image process img = image.load_img(image_path, grayscale=True, target_size=(28, 28)) x = image.img_to_array(img) x = np.expand_dims(x, axis=0) #predict images = np.vstack([x]) classes = model.predict(images, batch_size=10) index_max = np.argmax(classes) global value if index_max == 0: value = 'A' elif index_max == 1: value = 'B' elif index_max == 2: value = 'C' elif index_max == 3: value = 'D' elif index_max == 4: value = 'E' elif index_max == 5: value = 'F' elif index_max == 6: value = 'G' elif index_max == 7: value = 'H' elif index_max == 8: value = 'I' elif index_max == 9: value = 'J' elif index_max == 10: value = 'K' elif index_max == 11: value = 'L' elif index_max == 12: value = 'M' elif index_max == 13: value = 'N' elif index_max == 14: value = 'O' elif index_max == 15: value = 'P' elif index_max == 16: value = 'Q' elif index_max == 17: value = 'R' elif index_max == 18: value = 'S' elif index_max == 19: value = 'T' elif index_max == 20: value = 'U' elif index_max == 21: value = 'V' elif index_max == 22: value = 'W' elif index_max == 23: value = 'X' elif index_max == 24: value = 'Y' elif index_max == 25: value = 'Z' #update firestore doc_ref = db.collection(u'sign-language').document(datee[-2]) doc_ref.set({file_nama.strip('.jpg'): value}, merge=True) print(file_nama + '\n') print(datee[-2] + '\n') print(value + '\n')
def _create_firebase_app(): global firebase_app cred = credentials.ApplicationDefault() firebase_app = firebase_admin.initialize_app(cred, { 'projectId': PROJECT_ID, })
def new_games_to_db(request): p = argparse.ArgumentParser() p.add_argument('-d', '--day', type=int, default=None, required=False) p.add_argument('-y', '--year', type=int, default=None, required=False) p.add_argument('-l', '--limit', type=int, default=float('inf'), required=False) p.add_argument('-k', '--keep_going', default=False, action='store_true') p.add_argument('-n', '--dry_run', default=False, action='store_true') p.add_argument('--nodry_run', dest='dry_run', action='store_false') r = p.parse_args() day = r.day year = r.year limit = r.limit keep_going = r.keep_going dry_run = r.dry_run if not dry_run: # Use the application default credentials cred = credentials.ApplicationDefault() firebase_admin.initialize_app(cred, { 'projectId': 'pennantchase-256', }) db = firestore.client() mydb = db.collection(u'mydb') if not day: r = requests.get( 'https://www.pennantchase.com/lgScoreboard.aspx?lgid=%s' % LEAGUE_ID) soup = bs4.BeautifulSoup(r.content, 'html.parser') select = soup.find_all(lambda tag: tag.has_attr('id') and tag['id'] == 'ContentPlaceHolder1_ddDays')[0] day = int( select.find_all(lambda tag: tag.has_attr('selected') and tag[ 'selected'] == 'selected')[0].getText()) print('Starting from day %d' % day, file=sys.stdout) if not year: # hope that if we're in playoffs we get the year right! r = requests.get( 'https://www.pennantchase.com/lgSchedule.aspx?lgid=%s' % LEAGUE_ID) playoffs = r.content.decode().find('Playoff') >= 0 r = requests.get(PAST_STANDINGS_URL) r.raise_for_status() last_year_str = re.sub(r"^.*Last Year's Standings: ([0-9]+)[^0-9].*$", r'\1', r.content.decode(), flags=re.DOTALL) if not re.match('^[0-9]+$', last_year_str): raise Exception("Couldn't determine year") last_year = int(last_year_str) year = last_year + (0 if playoffs else 1) print('Year = %d' % year, file=sys.stdout) fully_processed_count = 0 # for each day considered = 0 while day >= 1 and considered < limit: considered += 1 print('considering day %d' % day, file=sys.stdout) day_url = 'https://www.pennantchase.com/lgScoreboard.aspx?lgid=256&scoreday=%d' % day r = requests.get(day_url) r.raise_for_status() soup = bs4.BeautifulSoup(r.content, 'html.parser') score_tables = soup.find_all( lambda tag: tag.get('class', '') == ['scoreTable', 'table']) score_count = len(score_tables) if score_count == 0: time.sleep(5) upload_count = 0 # for each game for score_table in score_tables: rows = score_table.find_all('tr') header = [c.text for c in rows[0]] if header != [ 'Final', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'x', 'R', 'H', 'E' ]: raise Exception('Bad header ' % header) away_home_raw = rows[1:3] away_home_ids = [] away_home_runs = [] for line_raw in away_home_raw: # not very beautiful use of BeautifulSoup here: line_elts = list(line_raw) team_id = re.match(r'.*tid=([^&]+)', line_elts[0].find_all('a')[0]['href'])[1] away_home_ids.append(team_id) away_home_runs.append(int(line_elts[11].text)) box_score_url = 'https://www.pennantchase.com/' + rows[ -1].find_all('a')[0]['href'] game_id = re.match(r'.*sid=([^&]+)', box_score_url)[1] # and since we have them handy # home_runs # away_runs # TODO really document = { u'year': year, u'day': day, u'away': away_home_ids[0], u'home': away_home_ids[1], u'away_r': away_home_runs[0], u'home_r': away_home_runs[1], } if not dry_run: wrote = False transaction = db.transaction() ref = mydb.document(game_id) try: write_new_document(transaction, ref, document) wrote = True upload_count += 1 print('wrote', game_id) except exceptions.AlreadyExists as e: print(game_id, 'already exists') # Check if document is in db. This is here in case game_id turns out not to be unique. db_dict = ref.get().to_dict() if document != db_dict: raise ValueError('%s: %s %s but db contains %s' % (game_id, ('Wrote' if wrote else 'Tried to write'), document, db_dict)) else: print('Dry run, would have tried writing %s: %s' % (game_id, document)) if score_count > 0 and upload_count == 0: print('already processed day %d' % day, file=sys.stdout) fully_processed_count += 1 if fully_processed_count > 1 and not keep_going: print('already processed %d days, stopping' % fully_processed_count) break day -= 1
from datetime import datetime import simplejson as json import psaw import requests from flask import Flask from flask import request, jsonify, abort import firebase_admin from firebase_admin import credentials from firebase_admin import firestore from google.cloud import pubsub_v1 app = Flask(__name__) firebase_admin.initialize_app(credentials.ApplicationDefault()) reddit = psaw.PushshiftAPI() db = firestore.client() session = requests.Session() publisher = pubsub_v1.PublisherClient() @app.route('/r/<subreddit>') def run(subreddit): before = request.args.get('before', type=int) submissions = reddit.search_submissions( before=before, subreddit=subreddit, limit=500, filter=[
from datetime import datetime from firebase_admin import credentials from google.cloud import firestore os.system('modprobe w1-gpio') os.system('modprobe w1-therm') base_dir = '/sys/bus/w1/devices/' device_folder = glob.glob(base_dir + '28*')[0] device_file = device_folder + '/w1_slave' project_id = "smart-water-168ca" os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/home/pi/Desktop/smart-water-168ca-d9f996738e27.json" cred = credentials.ApplicationDefault() firebase_admin.initialize_app(cred, { 'projectId': project_id, }) db = firestore.Client() def generateID(): data = datetime.now() sensorBase_ref = db.collection(u'sensorBase').document() sensorBase_ref.set({ "registration": data, "location": "" }) return sensorBase_ref.id def getSensorBase_id():
def garmin_callback(request): # the firebase app may or may not be initialized; if not, we do it now # we need to edit this code to check if the correct app (i.e. the one in the config) is the one initialized # there may be more than one app (e.g. during migration to new app id)... if len(firebase_admin._apps) == 0: cred = credentials.ApplicationDefault() firebase_admin.initialize_app(cred, { 'projectId': config.PROJECT_ID, }) db = firestore.client() # log some stuff (remove this code later as will have some "secrets" in it) logging.info(firebase_admin._apps) logging.info(utils.get_flask_request(request)) # Extract the required fields from the request oauth_token_request = '' oauth_verifier = '' try: oauth_token_request = request.args['oauth_token'] oauth_verifier = request.args['oauth_verifier'] except: logging.info( 'Required parameters not return: oauth_token={}, oauth_verifier={}' .format(utils.obfuscate_secret(oauth_token_request), utils.obfuscate_secret(oauth_verifier))) return make_response( 'Required call back parameters (oauth_token and oauth_verifier) not sent -- user denied access', 401) # Garmin seems to return null in the verifier if not authorized if oauth_verifier == 'null': logging.info( 'Authorization denied for Garmin. oauth_token={}, oauth_verifier={}' .format(utils.obfuscate_secret(oauth_token_request), utils.obfuscate_secret(oauth_verifier))) return make_response('App not authorized in Garmin', 401) # Find the corresponding previously save oauth token secret from Firestore request_token_ref = db.collection( config.GARMIN_REQUEST_TOKENS_COLLECTION).document(oauth_token_request) request_token_doc = request_token_ref.get() if not request_token_doc.exists: return make_response( 'Garmin request token doesnt exist in the database; cant map user (oauth_token_request={})' .format(utils.obfuscate_secret(oauth_token_request)), 401) request_token = request_token_doc.to_dict() logging.info( 'Found user from Garmin request token: uid={} (oauth_token_request={})' .format(request_token['uid'], utils.obfuscate_secret(oauth_token_request))) oauth_token_secret_request = request_token['secret'] # Complete auth by sending back verifier, token & previously saved secret to exchange for access token client = oauthlib.oauth1.Client( client_key=config.GARMIN_KEY, client_secret=config.GARMIN_SECRET, signature_type='BODY', resource_owner_key=oauth_token_request, verifier=oauth_verifier, resource_owner_secret=oauth_token_secret_request) # sign it uri, headers, body = client.sign( config.GARMIN_ACCESS_TOKEN_URL, http_method='POST', headers={'Content-Type': 'application/x-www-form-urlencoded'}, body='') # submit request response = requests.post(uri + '?' + body) if response.status_code != 200: logging.error('Garmin access token request failure {}: {} ({})'.format( response.status_code, response.reason, utils.obfuscate_secret(oauth_token_request))) return make_response('Garmin access token request failure', 401) # extract the access token qs = parse.parse_qs(response.text) oauth_token_access = qs['oauth_token'][0] oauth_token_secret_access = qs['oauth_token_secret'][0] logging.info( 'Access token received: uid={}, access_token={}, access_token_secret={}' .format(request_token['uid'], utils.obfuscate_secret(oauth_token_access), utils.obfuscate_secret(oauth_token_secret_access))) # In a Firestore transaction, do the following: # 1. invalidate the request token # 2. invalidate any old access token(s) for the user # 3. set the new access token @firestore.transactional def update_in_transaction(transaction, request_token_ref, user_ref, oauth_token_access, oauth_token_secret_access): # get the user user_snapshot = user_ref.get(transaction=transaction) user_current_token = user_snapshot.get('access_token') # invalidate request token transaction.update(request_token_ref, {'used': True}) logging.info('Set request token {} as used for {}'.format( utils.obfuscate_secret(request_token_ref.id), user_ref.id)) # add new token new_access_token_ref = db.collection( config.GARMIN_ACCESS_TOKENS_COLLECTION).document( oauth_token_access) transaction.set( new_access_token_ref, { 'uid': user_ref.id, 'secret': oauth_token_secret_access, 'active': True, 'usages': 0, 'created_ts': dt.utcnow() }) logging.info('Added new access token {} for {}'.format( utils.obfuscate_secret(new_access_token_ref.id), user_ref.id)) # create reference to new token transaction.set(user_ref, {'access_token': oauth_token_access}, merge=True) logging.info('Set user token {} for {}'.format( utils.obfuscate_secret(new_access_token_ref.id), user_ref.id)) # invalidate old token if user_current_token: existing_token_ref = db.collection( config.GARMIN_ACCESS_TOKENS_COLLECTION).document( user_current_token) transaction.update(existing_token_ref, {'active': False}) logging.info('Invalidate old token {} for {}'.format( utils.obfuscate_secret(existing_token_ref.id), user_ref.id)) transaction = db.transaction() user_ref = db.collection(config.USERS_COLLECTION).document( request_token['uid']) update_in_transaction(transaction, request_token_ref, user_ref, oauth_token_access, oauth_token_secret_access) return redirect(config.HOME_PAGE)