def GetScores(): scores = {} # First 12 weeks for week in range(12): week = str(week + 1) url = 'https://fantasy.nfl.com/league/' + LEAGUE_ID + '/team/3/gamecenter?week=' + week filename = os.path.dirname( os.path.realpath(__file__)) + '/data/scores' + week + '.txt' # Scrape fresh data, if desired if UPDATE: scrape(url, filename) f = open(filename, 'r') txt = f.read() html = BeautifulSoup(txt, 'html.parser') totals = html.find_all('span', {'class': 'teamTotal'}) week_scores = {} for t in totals: team_id = t['class'][1][-1] week_scores[team_id] = float(t.text) scores[week] = week_scores f.close() return scores
def convertUrlsToFeatures(urls): features = [] for url in urls: result = helpers.scrape(url) print(result[1]) features.append(helpers.start(result[0], result[1], result[2], cat_dict, stem_dict, counts_dict, loaded_model, count_vect, tfidf_transformer, bias, assertives, factives, hedges, implicatives, report_verbs, positive_op, negative_op, wneg, wpos, wneu, sneg, spos, sneu)) features = np.array(features) return features
def GetRecords(): url = 'https://fantasy.nfl.com/league/' + LEAGUE_ID filename = 'data/records.txt' # Scrape fresh data, if desired if UPDATE: scrape(url, filename) f = open(filename, 'r') txt = f.read() html = BeautifulSoup(txt, 'html.parser') data = html.find_all('td', {'class': 'teamWinPct'}) records = {} for d in data: records[d.parent['class'][0][-1]] = d.text f.close() return records
def GetTeams(): url = 'https://fantasy.nfl.com/league/' + LEAGUE_ID filename = os.path.dirname(os.path.realpath(__file__)) + '/data/teams.txt' # Scrape fresh data, if desired if UPDATE: scrape(url, filename) f = open(filename, 'r') txt = f.read() html = BeautifulSoup(txt, 'html.parser') data = html.find_all('a', {'class': 'teamName'}) teams = {} for d in data: teams[d['class'][1][-1]] = d.text f.close() return teams
def GetManagers(): url = 'https://fantasy.nfl.com/league/' + LEAGUE_ID + '/owners' filename = os.path.dirname( os.path.realpath(__file__)) + '/data/managers.txt' # Scrape fresh data, if desired if UPDATE: scrape(url, filename) f = open(filename, 'r') txt = f.read() html = BeautifulSoup(txt, 'html.parser') data = html.find_all('a', {'class': 'teamName'}) managers = {} for d in data: manager = d.parent.parent.nextSibling.text managers[d['class'][1][-1]] = manager f.close() return managers
def setSnippetWeightedKeywords(snippetId): ''' Find, weight and save related bigrams and trigrams for selected snipept Args: snippetId (int): Snippet ID Returns: String summary of the process ''' snippet = getSnippetById(snippetId) print('Found snippet id [', snippetId, ']') searchQuery = snippet.get('searchQuery', snippet.get('title', '')) urls = getQueryUrls(searchQuery, snippet['language']) print('Found {} search results for [{}]'.format(len(urls), searchQuery)) print(urls) articles = [scrape(url, snippet['language'])[0] for url in urls if not re.compile(r'\.pdf$', re.M|re.I).search(url)] print('Scraped {} articles'.format(len(articles))) if len(articles): # Build TF-IDF matrix stopWords = getStopWords(snippet['language']) vectorizer = TfidfVectorizer(max_df=0.6, min_df=0.2, ngram_range=(1, 3), lowercase=True, max_features=config.snippetsKeywordsBuilder['maxFeatures'], stop_words=stopWords) TfIdfMatrix = vectorizer.fit_transform(articles) inspectMatrix(TfIdfMatrix, vectorizer) topFeatures = getTopNFeatures(vectorizer, TfIdfMatrix) print('Top features are:', topFeatures) topFeaturesNormalized = [(feature[0],feature[1]/topFeatures[0][1]) for feature in topFeatures] #topFeatures[0][1] is the max value (ordered list) print('Top features normazlied:', topFeaturesNormalized) weightedNGrams = getPhrases(topFeaturesNormalized, articles, snippet['language']) # print('N grams:', weightedNGrams) saveSnippetKeywords(snippet, weightedNGrams) return resultSummary(weightedNGrams, snippetId) else: return 'no relevant articles found'