def main(): links = [[1,2,3],[3],[0,3],[0,2]] result = rank(links) print("testing simple") print("INPUT:") print(links) print("OUTPUT:") print(result) assert (result == [3, 0, 2, 1]) print("passed!") links = [[1,2,3],[3],[0,3],[]] result = rank(links) print("testing dangling") print("INPUT:") print(links) print("OUTPUT:") print(result) assert (result == [3, 0, 2, 1]) print("passed!") links = [[1,2,3], [3,4], [0,3], [1,6], [6], [4,7], [5], [5,6]] result = rank(links) print("testing reducible") print("INPUT:") print(links) print("OUTPUT:") print(result) assert (result == [5, 6, 4, 7, 3, 1, 0, 2]) print("passed!") links = [[1, 5], [2, 5], [1, 3, 5], [4], [1, 5], [2, 6], [0, 1]] result = rank(links) print("testing given sample 1") print("INPUT:") print(links) print("OUTPUT:") print(result) assert (result == [5, 2, 1, 6, 3, 4, 0] or result == [5, 2, 1, 6, 4, 3, 0]) print("passed!") links = [[1,3,4],[0,2,4],[3,6],[2,4,6],[5,8],[4,6,8],[0,7,9],[0,6,8],[2,9],[0,2,8]] result = rank(links) print("testing given sample 2") print("INPUT:") print(links) print("OUTPUT:") print(result) assert (result == [2,6,8,0,3,9,4,5,7,1] or result == [2,6,8,0,4,3,9,5,7,1]) print("passed!")
def getRankings(week, model, teamToIndex, week_multiplier=None): A = np.array(getMatrixForSeason(week, model, week_multiplier)) R = pagerank.rank(A) rankings = [(i, R[i]) for i in range(len(indexToTeam))] indexToExpectedPoints, indexToRandomPoints, indexToMorePoints = getSeasonStats( R, week, model[0], teamToIndex) return indexToExpectedPoints
def f(collected_data, week, canAddInput, homeIndex, awayIndex, homeScore, awayScore): if not (canAddInput): return # Also happens in createA # Could happen here depends on when canAddInput if var not in collected_data: R = [[1.0 / collected_data['team_count']] for i in range(collected_data['team_count'])] else: A = collected_data[var] R = pagerank.rank(A) collected_data["inputs"][-1].append(R[homeIndex][0]) collected_data["inputs"][-1].append(R[awayIndex][0])
def main(csv_file_name='epl-2021.csv', output_file_name='epl-predictions-stats.json'): model = epl.getData(csv_file_name) data, indexToTeam, teamToIndex, indexToGamesPlayed = model predictor = predictions_tensorflow.createPredictGameFunction(csv_file_name) indexToPlaceFinishedToTimesFinished, indexToMeanPoints, indexToTeam = probs.calculateProbs( 10**5, csv_file_name, predictor) week = 40 A = np.array(epl.getMatrixForSeason(week, model, None)) R = pagerank.rank(A) rankings = [(indexToMeanPoints[i], i) for i in range(len(indexToMeanPoints))] rankings.sort(reverse=True) jsonData = [] for value in rankings: probabilties = indexToPlaceFinishedToTimesFinished[value[1]] jsonData.append({ "name": indexToTeam[value[1]], "probability": list(probabilties), "expected": value[0], "championslegue": probabilties[0] + probabilties[1] + probabilties[2] + probabilties[3], "relegated": probabilties[-1] + probabilties[-2] + probabilties[-3], "pagerank": R[value[1]][0], }) games_data = probs.calculateProbsOfEachGameInASeason( csv_file_name, predictor) with open(output_file_name, 'w') as out: out.write(json.dumps({'teams': list(jsonData), 'games': games_data}))
from utils import parse import pagerank graph = parse("graph.txt") pagerank.rank(graph)
if __name__ == "__main__": csvFileName = "epl-2021.csv" week = 40 if len(sys.argv) > 1: csvFileName = sys.argv[1] if len(sys.argv) > 2: week = int(sys.argv[2]) model = getData(csvFileName) data, indexToTeam, teamToIndex, indexToGamesPlayed = model # Get matrix A and Ranking A = getMatrixForSeason(week, model, None) #createWeekMultiplier(8)) R = pagerank.rank(A) rankings = [(i, R[i]) for i in range(len(indexToTeam))] printRankings(rankings, model) indexToExpectedPoints, indexToRandomPoints, indexToMorePoints = getSeasonStats( R, week, data, teamToIndex) expectedPointsRanking = [(i, indexToExpectedPoints[i]) for i in range(len(indexToExpectedPoints))] randomPointsRanking = [(i, indexToRandomPoints[i]) for i in range(len(indexToRandomPoints))] morePointsRanking = [(i, indexToMorePoints[i]) for i in range(len(indexToMorePoints))]
def select_top(dataset = 'training'): ExcludeSentencesWithNoNamedEntities = True ner_tagger = loadStanfordNERTagger() meta_regex = re.compile(r'^([A-Z]{2,}.{,25}\(.{,25}\))|^([A-Z\s]{2,}(\_|\-))') ranked_sentences = pagerank.rank(dataset) input_directoryPath = os.path.join('outputs/pagerank_D4', dataset) output_directoryPath = os.path.join('outputs/reranker_D4', dataset) top_sents = {} for topic_id in ranked_sentences.keys(): sentences = ranked_sentences[topic_id] id_part1 = topic_id[:-1] id_part2 = topic_id[-1:] output_file_name = id_part1 + "-A.M.100." + id_part2 + ".1" output_file_path = os.path.join(output_directoryPath, output_file_name) vocab = set() for sentence in sentences: original = sentence.original_sent match = re.search(meta_regex, original) clean = re.sub(meta_regex, '', original).replace('--', '').lower() sentence.original_sent = re.sub(meta_regex, '', original).replace('--', '') sentence.original_sent = compress(sentence.original_sent) sentence.clean_sent = clean splitted = clean.split() for word in splitted: vocab.add(word.lower()) vocab_sentences_count = {} for word in vocab: vocab_sentences_count[word] = 0 for sentence in sentences: unique_terms = set() for word in sentence.clean_sent.split(): unique_terms.add(word) for word in unique_terms: vocab_sentences_count[word] = vocab_sentences_count[word] + 1 idf = {} for word in vocab: idf[word] = 1.0 + math.log(float(len(sentences) / float(vocab_sentences_count[word]))) chosen_sentences = [] total_word_count = 0 for sent_obj in sentences: sentence = sent_obj.clean_sent if total_word_count + len(sentence.split()) > 100: continue # decide whether or not to include the sentence based on the cosine similarity include_sentence = True for chosen_sentence_obj in chosen_sentences: chosen_sentence = chosen_sentence_obj.clean_sent cosine_score = cosine(sentence.lower(), chosen_sentence.lower(), idf) if cosine_score > THRESHOLD: include_sentence = False break if include_sentence: # exclude sentences with no named entities if ExcludeSentencesWithNoNamedEntities and not hasNamedEntities(ner_tagger, sent_obj.original_sent): logging.info('Ignoring sentence because it does not have named entities') logging.info('Sentence: ' + sent_obj.original_sent ) continue # exclude quotes if sent_obj.clean_sent.startswith('\'\'') or sent_obj.clean_sent.startswith('``'): continue chosen_sentences.append(sent_obj) total_word_count += len(sentence.split()) with io.open(output_file_path,'w', encoding='utf8') as outputFile: for sentence in chosen_sentences: outputFile.write(sentence.original_sent) outputFile.write('\n') outputFile.flush() outputFile.close() top_sents[topic_id] = chosen_sentences return top_sents
def select_top(dataset='training'): print('selecting top...') meta_regex = re.compile( r'^([A-Z]{2,}.{,25}\(.{,25}\))|^([A-Z\s]{2,}(\_|\-))') ranked_sentences = pagerank.rank(dataset) input_directoryPath = getDirectoryPath("outputs/pagerank_D3/devtest/") output_directoryPath = getDirectoryPath("../../outputs/reranker/devtest/") top_sents = {} for topic_id in ranked_sentences.keys(): sentences = ranked_sentences[topic_id] id_part1 = topic_id[:-1] id_part2 = topic_id[-1:] output_file_name = id_part1 + "-A.M.100." + id_part2 + ".1" output_file_path = output_directoryPath + "/" + output_file_name vocab = set() for sentence in sentences: original = sentence.original_sent match = re.search(meta_regex, original) clean = re.sub(meta_regex, '', original).replace('--', '').lower() clean = compress(clean) sentence.clean_sent = clean splitted = clean.split() for word in splitted: vocab.add(word.lower()) vocab_sentences_count = {} for word in vocab: vocab_sentences_count[word] = 0 for sentence in sentences: unique_terms = set() for word in sentence.clean_sent.lower().split(): unique_terms.add(word) for word in unique_terms: vocab_sentences_count[word] = vocab_sentences_count[word] + 1 idf = {} for word in vocab: idf[word] = 1.0 + math.log( float(len(sentences) / float(vocab_sentences_count[word]))) chosen_sentences = [] total_word_count = 0 for sent_obj in sentences: sentence = sent_obj.clean_sent if len(sentence.strip()) > 0: if total_word_count + len(sentence.split()) > 100: continue # decide whether or not to include the sentence based on the cosine similarity include_sentence = True for chosen_sentence_obj in chosen_sentences: chosen_sentence = chosen_sentence_obj.clean_sent cosine_score = cosine(sentence.lower(), chosen_sentence.lower(), idf) if cosine_score > THRESHOLD: include_sentence = False break if include_sentence: #exclude quotes: if not sent_obj.clean_sent.startswith( '\'\'') and not sent_obj.clean_sent.startswith( '``') and not sent_obj.clean_sent.startswith( '"'): chosen_sentences.append(sent_obj) total_word_count += len(sentence.split()) with io.open(output_file_path, 'w', encoding='utf8') as outputFile: for sentence in chosen_sentences: outputFile.write(sentence.clean_sent) outputFile.write(' ') outputFile.flush() outputFile.close() top_sents[topic_id] = chosen_sentences return top_sents