def get_unsplash_photos(food, page=1): term = food.name key = f'{term}||{page}' response = UNSPLASH_CACHE.get_item(key) if not response: query_params = f'?page={page}&query={term}&client_id={UPSPLASH_APIKEY}' response = requests.get( f'{BASE}{query_params}').json() UNSPLASH_CACHE.cache_item(key, response) return py_.map(response.get('results'), lambda photo, i: UnsplashPhoto.objects.get_or_create( food=food, search_term = term, order = i * page, total = response.get('total'), width = photo.get('width'), height = photo.get('height'), color = photo.get('color'), blur_hash = photo.get('blur_hash'), description = photo.get('description'), alt_description = photo.get('alt_description'), raw = py_.get(photo, 'urls.raw'), full = py_.get(photo, 'urls.full'), small = py_.get(photo, 'urls.small'), thumb = py_.get(photo, 'urls.thumb'), regular = py_.get(photo, 'urls.regular'), unsplash_page = py_.get(photo, 'links.html'), username = py_.get(photo, 'user.username'), ancestryCategory = py_.get(photo, 'tags[0].source.ancestry.category.slug'), ancestrySubcategory = py_.get(photo, 'tags[0].source.ancestry.subcategory.slug'), )[0])
def get_bolded_names(soup): # other names for the food are bolded in the first paragraph paragraphs = soup.select('.mw-parser-output p:not(.mw-empty-elt)') firstParagraph = paragraphs[0] aliasIndicators = ['known as', 'common name'] if py_.some(aliasIndicators, lambda l: l in firstParagraph.text.lower()): return filter( None, py_.uniq(py_.map(firstParagraph.select('b'), lambda b: b.text)))
def stripSearch(convo): return { 'color': getColor(convo['last_message']['created_at']), 'tags': py_.map(convo['tags'], getName), 'timeElapsed': timeElapsed(convo['last_message']['created_at']), 'timeElapsedMins': timeElapsed(convo['last_message']['created_at']) / 60, 'conversation_link': "https://app.frontapp.com/open/" + convo['id'] }
def check_has_subunit(alist, line_name, unit): ret = [] children = _.result(_.find(alist, {'unit':unit}), 'children') ids = _.pluck(children, 'id') ids = _.map(ids, lambda x:'unitsub_' + x) for id in ids: p = get_occur_p(line_name, id) if p>0: ret.append(id) return ret
def get_simulators(name_filter='iPhone', os_filter='iOS'): output = xcrun.simctl('list', '-j') json_output = json.loads(output.stdout.decode('ascii')) devices = json_output['devices'] device_list = [] for os, list in devices.items(): device_list.extend(py_.map(list, lambda element: py_.extend({}, element, {'os': os}))) device_list = py_.filter(device_list, lambda x: os_filter in x['os']) device_list = py_.filter(device_list, lambda x: name_filter in x['name']) device_list = py_.filter(device_list, {'availability': '(available)'}) return device_list
def extractFeatures(indices): """提取特征 Args: indices 单词索引 Returns: feature 邮件特征 """ feature = py_.map(range(1, len(vocabList) + 1), lambda index: py_.index_of(indices, index) > -1) return np.array(feature, dtype=np.uint)
def get_pixabay_photos(food, page=1): term = food.name key = f'{term}||{page}' response = PEXELS_CACHE.get_item(key) if not response: query_params = dictToQuery({ 'q': term, 'page': page, 'per_page': 30, 'image_type': 'photo', 'key': PIXABAY_APIKEY }) # query_params = f'?page={page}&per_page=30&q={term}&image_type=photo&key={PIXABAY_APIKEY}' response = requests.get(f'{BASE}?{query_params}').json() PEXELS_CACHE.cache_item(key, response) return py_.map( response.get('hits'), lambda photo, i: PixabayPhoto.objects.get_or_create( food=food, search_term=term, # engineered features for ML order=i, total=response.get('total'), pageURL=photo.get('pageURL'), pixabay_id=photo.get('id'), tags=photo.get('tags'), previewWidth=photo.get('previewWidth'), previewHeight=photo.get('previewHeight'), webformatWidth=photo.get('webformatWidth'), webformatHeight=photo.get('webformatHeight'), largeImageURL=photo.get('largeImageURL'), imageURL=photo.get('imageURL'), imageWidth=photo.get('imageWidth'), imageHeight=photo.get('imageHeight'), imageSize=photo.get('imageSize'), views=photo.get('views'), downloads=photo.get('downloads'), favorites=photo.get('favorites'), likes=photo.get('likes'), comments=photo.get('comments'), user_id=photo.get('user_id'), user=photo.get('user'), previewURL=photo.get('previewURL'), # webformatURL = photo.get('webformatURL'), # fullHDURL = photo.get('fullHDURL'), )[0])
def find_sftp_server(client, server_name): # Finding a server by name is a little more complicated than I originally expected. Rather than wasting resources # it's much easier to just go find it and then check if the return value of this method is None. # Load all of the server IDs in the account all_server_ids = py_.map(client.list_servers()['Servers'], 'ServerId') all_servers = py_.map_( all_server_ids, (lambda server_id: client.describe_server(ServerId=server_id))) host = py_.find( all_servers, {'Server': { 'Tags': [{ 'Key': SERVER_NAME_KEY, 'Value': server_name }] }}) return host
def top_recommendation(self): AITO_INSTANCE_URL = 'https://junction-test.aito.app' AITO_API_KEY = '4yaBPf9Kmk9xHNW30jBop7ieEmWMz2eSpmKyWvBi' client = AitoClient(instance_url=AITO_INSTANCE_URL, api_key=AITO_API_KEY) queries = self.request.query_params query_type = queries.get("type", "Mexican") limit = int(queries.get("limit", 0)) user_id = queries.get("userID") wh = { "placeID.cuisine": query_type, } if user_id: like_places = list( map( lambda t: {"placeID": t}, UserLike.objects.exclude(user_id=user_id).values_list( "place__aito_id", flat=True))) if like_places: query = { "from": "ratings", "where": { "$and": like_places }, } res = aito_api.generic_query(client=client, query=query) user_ids = py_.map(res["hits"], "userID") wh["userID"] = {"$and": user_ids} rec_query = { "from": "ratings", "where": wh, "recommend": "placeID", "goal": { "rating": 2 }, } if limit: rec_query['limit'] = int(limit) res = aito_api.recommend(client=client, query=rec_query) return res.json['hits']
def get_pexels_photos(food, page=1): term = food.name key = f'{term}||{page}' response = PEXELS_CACHE.get_item(key) if not response: query_params = f'?page={page}&per_page=30&query={term}' response = requests.get(f'{BASE}{query_params}', headers={ "Authorization": PEXELS_APIKEY }).json() PEXELS_CACHE.cache_item(key, response) return py_.map( response.get('photos'), lambda photo, i: PexelsPhoto.objects.get_or_create( food=food, search_term=term, # engineered features for ML order=response.get('page') * i, total=response.get('total_results'), pexels_id=photo.get('id'), width=photo.get('width'), height=photo.get('height'), url=photo.get('url'), photographer=photo.get('photographer'), photographer_url=photo.get('photographer_url'), photographer_id=photo.get('photographer_id'), avg_color=photo.get('avg_color'), original=py_.get(photo, 'src.original'), large2x=py_.get(photo, 'src.large2x'), small=py_.get(photo, 'src.small'), tiny=py_.get(photo, 'src.tiny'), # large = py_.get(photo, 'src.large'), # medium = py_.get(photo, 'src.medium'), # portrait = py_.get(photo, 'src.portrait'), # landscape = py_.get(photo, 'src.landscape'), )[0])
def get_engagement_priority_from_record( identity: Identity, record: Record) -> Union[None, List[Dict]]: """Retrieve the Engagement Priority metadata associated with a record. Args: identity (flask_principal.Identity): User identity record (invenio_records.Record): Record API Object from where the engagement priorities must be extracted. Returns: Union[None, List[Dict]]: None or the engagement priorities metadata (as dict). """ # getting the engagement priority topics result = None record_engagement_priorities = py_.get(record, "metadata.engagement_priorities", []) record_engagement_priorities_ids = py_.map(record_engagement_priorities, lambda x: x["id"]) if record_engagement_priorities_ids: record_engagement_priorities = vocabulary_service.read_many( identity=identity, type="engagementprioritiestypes", ids=record_engagement_priorities_ids, ).to_dict() result = (py_.chain(record_engagement_priorities).get( "hits.hits", []).map( lambda x: py_.set_( x, "props.icon", url_for("static", filename=py_.get(x, "props.icon")), ) if py_.get(x, "props.icon") != "" else x, )).value() return result
def unprocessed_mpids(sources, targets): xas = sources[0] xas_averaged = targets[0] mpids_marked_invalid = set(invalid_pks(xas_averaged, 'mp_id')) mpids_source_updated = set( updated_pks(xas, targets, 'mp_id', dt_map=lambda dt: dt.isoformat())) mpids_build_incomplete = set() for mp_id in tqdm(xas.collection.distinct('mp_id')): doc = xas.collection.find_one({'mp_id': mp_id}, ['structure']) structure = Structure.from_dict(doc['structure']) elements = set( py_.map( py_.flatten(site.species_and_occu.elements for site in structure.sites), str)) n_processed = xas_averaged.collection.find({ 'mp_id': mp_id, 'element': { '$in': list(elements) } }).count() if n_processed != len(elements): mpids_build_incomplete.add(mp_id) return mpids_source_updated | (mpids_build_incomplete - mpids_marked_invalid)
def run(self, N, seeds): new_seeds = [] # Exctract all the tweets for s in seeds: print("Starting seed: "+s["handle"]) tweets_seed = self.crawler.get_users_tweets(s["handle"], N) if (len(tweets_seed) == 0): self.db_manager.delete_element("seeds", {"handle": s["handle"],"id_experiment":self.id_experiment}) continue # else: # logging.info(s+" Tweets' number: "+str(len(tweets_seed))) for item in tweets_seed: item.update( {"id_experiment":self.id_experiment, "seed":s["_id"]}) self.db_manager.write_mongo("tweets", tweets_seed) handels_new = set(self.crawler.get_all_handles_mentioned(tweets_seed, s["handle"])) # print(s+" Handles mentioned: "+" ".join(handels_new)) handles = [] for h in handels_new: h_dict = { "handle":h, "origin":s["handle"] } new_seeds.append(h_dict) #if len(handles) != 0: # self.db_manager.write_mongo("seeds", [{"handle":h, "starting":False} for h in handels_new]) #pprint.pprint(handles) #new_seeds = list(set(new_seeds+handles)) new_seeds = py_(new_seeds).group_by("handle").to_pairs().map(lambda p: {"handle":p[0],"origin":py_.map(p[1],"origin")}).value() return new_seeds
def get_context(word): if not word: return headers = { 'User-Agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) " "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36" } res = requests.get(f'https://dict.leo.org/englisch-deutsch/{word}', headers=headers) soup = BeautifulSoup(res.content, 'html.parser') result = soup.select_one('div[data-dz-search="result"]') # TODO: suggestion search_term = result.get('data-leo-search-term') or word word_info = dict(word=search_term, en=[], de=[]) tables = result.select('div > table') word_info['titles'] = ['english', 'deutsch', 'korean', 'spanish', 'french'] word_info['results'] = [] for table in tables: for entry in table.select('tbody > tr[data-dz-ui="dictentry"]'): row = [''] * 5 for i, lang in enumerate(['en', 'de']): data = entry.select_one(f'td[lang="{lang}"]') if data: row[i] = data.text word_info['results'].append(row) if word_info['results']: break # example sentences for entry in result.select( '[data-dz-name="example"] > table > tbody > tr[data-dz-ui="dictentry"]' ): row = [''] * 5 for i, lang in enumerate(['en', 'de']): data = entry.select_one(f'td[lang="{lang}"]') if data: row[i] = data.text word_info['results'].append(row) # korean, spanish, french if word_info['results']: languages = ('ko', 'es', 'fr') result_list = translate(py_.map(word_info['results'], 0), languages) for i, results in enumerate(result_list): for j, res in enumerate(results): word_info['results'][j][2 + i] = res.text # TODO: search_term에 article 붙이기 # https://pixabay.com/api/docs/ res = requests.get('https://pixabay.com/api/', params=dict(key='10332400-1448498582be2b2e5a39c04ca', q=search_term, lang='de', per_page=12)) word_info['images'] = py_.map( res.json()['hits'], lambda x: dict(preview=x['previewURL'], large=x['largeImageURL'])) return dict(word_info=word_info)
def get_queryset(self): return super().get_queryset().filter( aito_id__in=py_.map(self.top_recommendation, 'placeID') )
def map(self, callback=None): """Map `callback` to each item returned by :meth:`all`.""" return py_.map(self.all(), callback)
def run(): if request.form.get("email") != None: db_manager.update("auth_users", {"social_id":current_user.social_id}, { "$set": {"email": request.form.get("email")}}) experiment = {} hubs = [] original_experiment_id = request.args.get('experiment') if original_experiment_id!=None: original_experiment = dict(list(db_manager.find("experiment",{"_id":ObjectId(original_experiment_id)}))[0]) experiment["user_id"] = current_user._id configuration.access_token = current_user.access_token configuration.access_token_secret = current_user.access_token_secret configuration.consumer_key = configuration.providers["twitter"]["id"] configuration.consumer_secret = configuration.providers["twitter"]["secret"] #Get seeds and expert types if original_experiment_id!=None: seeds = request.form.getlist("accepted") experiment["original_experiment"] = ObjectId(original_experiment_id) #elif "recipe" in request.form: # seeds = recipe["seeds"] elif request.files["input_seeds"].filename == '': seeds = [v for k,v in request.form.items() if "prof" in k] pprint.pprint(seeds) else: seeds_file = request.files["input_seeds"] seeds_dataframe = pd.read_csv(seeds_file) seeds = seeds_dataframe.ix[:, 0].tolist()[:20] if original_experiment_id!=None: hubs = request.form.getlist("accepted-hubs") experiment["original_experiment"] = ObjectId(original_experiment_id) #elif "recipe" in request.form: # seeds = recipe["seeds"] elif request.files["input_hubs"].filename == '': hubs = [v for k,v in request.form.items() if "hub" in k and v!=""] pprint.pprint(hubs) else: hubs_file = request.files["input_hubs"] hubs_dataframe = pd.read_csv(hubs_file) hubs = hubs_dataframe.ix[:, 0].tolist()[:20] if original_experiment_id!=None: experts = original_experiment["expert_types"] #elif "recipe" in request.form: # experts = recipe["expertTypes"] elif request.files["input_expert"].filename == '': experts = [v for k,v in request.form.items() if "check-box" in k] else: expert_file = request.files["input_expert"] expert_dataframe = pd.read_csv(expert_file) experts = expert_dataframe.ix[:, 0].tolist() #Add DbPedia Types to seeds and checks dandelion rate limit new_seeds = [] join_seeds = tweets_chunk.TweetsChunk([{"text":s}for s in seeds]) datatxt = EntityExtraction(app_id=configuration.APP_ID , app_key=configuration.API_KEY_DANDELION ) res = datatxt.nex(join_seeds.get_unique_string(), **{"include": ["types", "categories", "abstract", "alternate_labels"], "social.hashtag": True, "social.mention": True, "min_confidence":0}) join_seeds.split_annotation_each_tweet(res["annotations"]) for tweet in join_seeds.index_tweet: ann = tweet.get("annotations",[]) if tweet['tweet']['text']!="dummy": if len(ann) != 0: new_seeds.append({"handle":tweet["tweet"]["text"], "types":ann[0].get("types",[])}) else: new_seeds.append({"handle":tweet["tweet"]["text"], "types":[]}) #End Add DBPedia if original_experiment_id!=None: experiment["title"] = "Rerun of "+original_experiment["title"] else: experiment["title"] = request.form["title"] experiment["email"] = list(db_manager.find("auth_users",{"social_id":current_user.social_id}))[0]["email"] experiment["access_token"] = current_user.access_token experiment["access_token_secret"] = current_user.access_token_secret experiment["consumer_key"] = configuration.providers["twitter"]["id"] experiment["consumer_secret"] = configuration.providers["twitter"]["secret"] experiment["expert_types"] = experts #experiment["tags"] = request.form.get("tags",[]) experiment["status"] = "PROCESSING" experiment["creationDate"] = datetime.now() id_experiment = db_manager.write_mongo("experiment", dict(experiment)) if(int(datatxt.units_left) < configuration.MIN_REQUEST_DANDELION_NEEDED): print("error") error = "No units left in our Dandelion Keys! Please Insert yours key" flash(error, 'error') return render_template('index.html',title='Error') else: # #TODO: create inside orchestrator db_manager.store_seeds(py_.map(new_seeds,"handle"),id_experiment) db_manager.store_hubs(hubs,id_experiment) isHub = True if len(hubs)>0 else False crawler = PipelineCrawler(100,id_experiment,db_manager,isHub) knowldege_extractor = Pipeline(db_manager,id_experiment) #orchestrator = Orchestrator(crawler,knowldege_extractor,id_experiment) threading.Thread(target=Orchestrator, args=(crawler,knowldege_extractor,id_experiment,db_manager), ).start() return render_template('redirect.html',title='Completed Request')
def home(request): url_prefix = "https://dongyoungsang.club" url = url_prefix + "/bbs/board.php" headers = {'User-Agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) " "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36"} category_key = 'bo_table' params = {category_key: 'en', 'page': "1"} params.update(**{k: v for k, v in request.GET.items()}) html = requests.get(url, params=params, headers=headers) soup = BeautifulSoup(html.content, 'html.parser') # pagination pagination = [] for anchor in soup.find("ul", class_="pagination").find_all('a'): href = anchor.get('href') if not href or anchor.find('i'): continue pagination.append(dict( text=anchor.text, active=params['page'] == anchor.text, query=href.split('?')[-1], blank_url=url_prefix + href if '이전자료검색' in anchor.text else None )) # menu menu_anchor = py_.find(soup.select('div#nt_body a.list-group-item'), lambda a: a.text == '다시보기') menu_list = py_(menu_anchor.find_next_siblings('a')).map( lambda a: (a.get('href').strip('/'), a.text) ).filter( # TODO: include movie/ani lambda m: '/' not in m[0] ).value() # content list rows = soup.select('ul.bo_list a#link') query_set = {f"{k}={quote_plus(v)}" for k, v in request.GET.items()} current = None content_list = [] for row in rows: content = dict(title=row.text, query=row.get('href').split('?')[-1]) content['query_set'] = set(content['query'].split('&')) content['is_current'] = query_set == content['query_set'] if content['is_current']: current = content content_list.append(content) # current content if current: body = str(soup.select_one('div#bo_v_con > div:nth-of-type(2)') or '') content_anchor = soup.select_one('section#bo_v_atc > div:nth-of-type(2) > a.btn') content_href = content_anchor.get('href') content_url = url_prefix + content_href response = requests.get(content_url) soup = BeautifulSoup(response.content, 'html.parser') links = soup.select('section#bo_v_atc a.btn') re_link = re.compile(r'(href=[\'"])(https?://[^?]+\?(https?://[^\'"]+))') links = py_.map(links, lambda a: re_link.sub(r'\1\3', str(a))) current.update(body=body, links=links, source=content_url) context = dict( menu_list=menu_list, category_key=category_key, category=params.get(category_key) or '', content_list=content_list, current=current, pagination=pagination ) return render(request, 'home.html', context)
def geo_record_detail(record=None, files=None, pid_value=None, is_preview=False): """Record detail page (aka landing page).""" # Base definitions files_data = None if files is None else files.to_dict() record_data = record.to_dict() record_ui = UIJSONSerializer().serialize_object_to_dict(record_data) # General record properties record_is_draft = record_ui.get("is_draft") # Start - Temporary block: Block to build the Knowledge Package and Knowledge Resource Context # into the Record Landing page. This block will be replaced with the # Knowledge Package Context API when it is implemented. # Note: We use functions in order to organize the package building workflow. # Related records all_related_records_informations = get_related_resources_metadata(record) # Extract user stories related_records_informations, user_stories = extract_user_stories( all_related_records_informations ) # Identifiers related_identifiers = get_related_identifiers_url( record, doi_prefix=current_app.config.get("DATACITE_PREFIX", None), ) # Removing all related resource that is a knowledge resource related_identifiers = filter_knowledge_resources_from_related_identifiers_url( related_identifiers, py_.map( all_related_records_informations, lambda y: y["id"], ), ) # Engagement priorities related_engagement_priorities = get_engagement_priority_from_record( g.identity, record ) # GEO Work programme activities programme_activity = get_programme_activity_from_record(g.identity, record) # Preparing the Subject (including Engagement priorities and target users) record_topics = prepare_record_topics(record_ui, related_engagement_priorities) # End - Temporary block return render_template( "geo_knowledge_hub/records/detail.html", pid=pid_value, record=record_ui, files=files_data, is_draft=record_is_draft, is_preview=is_preview, related_identifiers=related_identifiers, user_stories=user_stories, record_topics=record_topics, programme_activity=programme_activity, related_records_informations=related_records_informations, related_engagement_priorities=related_engagement_priorities, permissions=record.has_permissions_to( ["edit", "new_version", "manage", "update_draft", "read_files"] ), )