def main(args): movie_name = input("Movie name: ") ##### IMPORT DATA ###### movie_words, unique_words = import_words("context_analysis/data/" + movie_name + "_words_warped.txt") google_args = ["google/data/" + movie_name + "_gc_full_labels_1.json", "google/data/" + movie_name + "_gc_full_labels_2.json", "google/data/" + movie_name + "_gc_full_labels_3.json"] gc_labels, unique_gc_labels = import_google_labels(google_args) aws_labels, unique_aws_labels = import_aws_labels("context_analysis/data/" + movie_name + "_aws_labels.json") ##### INITIALISE MODELS ###### liteClient = retinasdk.LiteClient("557d9940-40ab-11e8-9172-3ff24e827f76") semantic_model = gensim.models.KeyedVectors.load_word2vec_format('GoogleNews-vectors-negative300.bin', binary=True, limit=500000) ##### Vectorize words ###### # returns embeddings vectors for every word in the w2v vocabulary # word2vec_whole_vectors = w2vec_vocab(semantic_model) word2vec_word_vectors, unknown_words = word2vec_generate_vectors(semantic_model, unique_words) word2vec_gc_vectors, unknown_gc_labels = word2vec_generate_vectors(semantic_model, unique_gc_labels) word2vec_aws_vectors, unknown_aws_labels = word2vec_generate_vectors(semantic_model, unique_aws_labels) ##### Make sure numbers add up ###### print("Word vectors: ", len(word2vec_word_vectors)) print("GC vectors:", len(word2vec_gc_vectors)) print("AWS vectors", len(word2vec_aws_vectors)) all_labels_vectors = np.concatenate((word2vec_word_vectors, word2vec_gc_vectors, word2vec_aws_vectors)) print("All vectors:", len(all_labels_vectors)) # retina_word_vectors = retina_generate_vectors(liteClient, words) ##### CLUSTER ESTIMATION ####### # estimate_elbow_cluster_num(all_labels_vectors) # for i in range(30, 36): # print("Num of clusters" + str(i)) # estimate_silhou_cluster_num(all_labels_vectors, i) #### PCA ####### # do_PCA(all_labels_vectors) #### CLUSTERING METHODS ####### # cluster_kmeans(all_labels_vectors, semantic_model) # cluster_agglom(word2vec_word_vectors, semantic_model) cluster_components = cluster_agglom(all_labels_vectors, semantic_model) clusters_across_time(cluster_components, movie_words, gc_labels, aws_labels) print("Done")
def predict(claim, source='All'): lite_client = retinasdk.LiteClient("2bc45a70-3a85-11e8-9172-3ff24e827f76") def get_news_titles(claim, keywords): kw = keywords import itertools claim_words = claim.split() for i in range(len(keywords)): keywords[i] = keywords[i] + " " keys_flat = list(itertools.chain(*keywords)) keywords = ''.join(keys_flat) new_claim = "" for word in claim_words: if word in keywords: new_claim = new_claim + " +" + word else: new_claim = new_claim + " " + word news_titles = [] news_api = search_news_api(new_claim) if news_api is not None: news_titles.append(news_api) return news_titles keywords = lite_client.getKeywords(claim) news = get_news_titles(claim.lower(), keywords) if (type(news[0])) != 'str': new = [] for y in news: for x in y: new.append(x) news = new count_agree = 0 count_disagree = 0 for title in news: test_sim = paralleldots.similarity(claim, title) score = test_sim["actual_score"] if score > 0.5: count_agree += 1 elif score <= 0.5: count_disagree += 1 if len(news) <= 0: return -1 else: probability = (count_agree / (count_agree + count_disagree)) * 100 return probability
def hammingCompare(outtweets, innerTwitter): client = retinasdk.FullClient(apiKey.retina_token, apiServer="http://api.cortical.io/rest", retinaName="en_associative") liteClient = retinasdk.LiteClient(apiKey.retina_token) res = [] for index, outtweet in enumerate(outtweets): result = {} # get simHash simhash_pair = getSimHash(outtweet[2], innerTwitter, client) if len(simhash_pair) > 1: diff_bits = simhash.num_differing_bits(simhash_pair['out_hash'], simhash_pair['in_hash']) hashes = [simhash_pair['out_hash'], simhash_pair['in_hash']] blocks = 4 # Number of blocks to use distance = 3 # Number of bits that may differ in matching pairs matches = simhash.find_all(hashes, blocks, distance) res.append([index, outtweet[2], matches]) return res
import argparse import urllib2 from bs4 import BeautifulSoup import itertools import retinasdk import credentials liteClient = retinasdk.LiteClient(credentials.API_KEY) def init(): parser = argparse.ArgumentParser() parser.add_argument('document_url', metavar='url', type=str, help='Document url to check for on snopes.com') parser.add_argument( '--w', metavar='words_cutoff', type=int, default=7, help= 'Cuts off lines with less of n words for keywords extraction (default=7)' ) parser.add_argument( '--k', metavar='keywords_cutoff', type=int, default=4, help='Consider only the first n relevant keywords (default=4)') parser.add_argument('--o', metavar='no_single_keyword',
with open(filename) as filehandle: lines = filehandle.readlines() with open(filename, 'w') as filehandle: lines = filter(lambda x: x.strip(), lines) filehandle.writelines(lines) #string to list converter def Convert(string): li = list(string.split("-")) return li #retinasdk client connection liteClient = retinasdk.LiteClient("1d5b1cc0-aa65-11e9-8f72-af685da1b20e") ################################################################################# #make sure directory is cleaned from last run #if os.path.isfile("keywords.txt"): # os.remove("keywords.txt") if os.path.isfile("clean.txt"): os.remove("clean.txt") if os.path.isfile("keywords.txt"): os.remove("keywords.txt") if os.path.isfile("temp.txt"): os.remove("temp.txt") if os.path.isfile("outfile.txt"): os.remove("outfile.txt")
import retinasdk liteClient = retinasdk.LiteClient("") def get_topic(titles): paragraph = "" for title in titles: paragraph += title keywords = liteClient.getKeywords(paragraph.encode('utf-8')) for index, title in enumerate(titles): if keywords[0] in title.lower(): return index
def keyword_classifier(self, sentence): liteClient = retinasdk.LiteClient( "ba4c1950-95ec-11e8-917d-b5028d671452") return liteClient.getKeywords(sentence)
def match_texts(text_1, text_2): """Match text.""" text_1_raw = text_1 text_2_raw = text_2 text_1 = uni_to_ascii(text_1) text_2 = uni_to_ascii(text_2) uuid_str = str(uuid4()) liteClient = retinasdk.LiteClient(RETINA_API_KEY) text_1_md5 = md5.new(text_1).hexdigest() text_2_md5 = md5.new(text_2).hexdigest() sim, text_1_fp, text_2_fp = "", "", "" text_1_keywords, text_2_keywords = "", "" try: text_1_fp = liteClient.getFingerprint(text_1) except CorticalioException: print(CorticalioException + "\nText:" + text_1) pass try: text_2_fp = liteClient.getFingerprint(text_2) except CorticalioException: print(str(CorticalioException) + "\nText:" + text_2) pass try: sim = str(liteClient.compare(text_1_fp, text_2_fp)) except CorticalioException: print(CorticalioException) pass try: text_1_keywords = liteClient.getKeywords(text_1) except CorticalioException: print(CorticalioException) pass try: text_2_keywords = liteClient.getKeywords(text_2) except CorticalioException: print(CorticalioException) pass if len(text_1_keywords) == 0: text_1_keywords = ["[None]"] if len(text_2_keywords) == 0: text_2_keywords = ["[None]"] if text_1_fp == "": text_1_fp = ["[None]"] if text_2_fp == "": text_2_fp = ["[None]"] if sim == "" or text_1_fp == "[None]" or text_2_fp == "[None]": sim = "[None]" document = Document() style = document.styles['Normal'] font = style.font font.name = 'Calibri' font.size = Pt(12) document.add_heading(DOCUMENT_HEADING, 0) document.add_paragraph('Text 1 and Text 2 similarity', style='Intense Quote') document.add_paragraph(sim) document.add_paragraph('Matching details', style='Intense Quote') table = document.add_table(rows=4, cols=3) table.style = 'Table Grid' font.bold = True table.cell(0, 1).text = "Text 1" table.cell(0, 2).text = "Text 2" table.cell(1, 0).text = "md5" table.cell(2, 0).text = "Keywords" table.cell(3, 0).text = "Fingerprint" font.bold = False table.cell(1, 1).text = text_1_md5 table.cell(1, 2).text = text_2_md5 table.cell(2, 1).text = ", ".join(text_1_keywords) table.cell(2, 2).text = ", ".join(text_2_keywords) table.cell(3, 1).text = ", ".join([str(el) for el in text_1_fp]) table.cell(3, 2).text = ", ".join([str(el) for el in text_2_fp]) document.add_page_break() document.add_heading('Original Text 1', 0) document.add_paragraph(get_clean_text(text_1_raw)) document.add_page_break() document.add_heading('Original Text 2', 0) document.add_paragraph(get_clean_text(text_2_raw)) document.save(RESULTS_FOLDER + "/" + uuid_str + FILES_EXTENTION) db = create_engine(SME_SQLALCHEMY_DATABASE_URI) db.echo = False metadata = MetaData(db) text_table = Table('text', metadata, autoload=True) text_table.insert().execute(text_md5=text_1_md5, text_keywords=", ".join(text_1_keywords), text_fingerprint=", ".join( [str(el) for el in text_1_fp]), text=text_1) text_table.insert().execute(text_md5=text_2_md5, text_keywords=", ".join(text_2_keywords), text_fingerprint=", ".join( [str(el) for el in text_2_fp]), text=text_2) return uuid_str
# %% from typing import List import retinasdk import matplotlib.pyplot as plt import numpy as np import json from preprocessing.learned_lemmatization_experiments.past_tense_from_w2v import past_tense # %% api_key = json.load( open( "./preprocessing/learned_lemmatization_experiments/cortico_api_key.json", "r")).get("key") liteClient = retinasdk.LiteClient(api_key) # %% def embed(word: str) -> List[int]: fingerprint = liteClient.getFingerprint(word) return fingerprint def to_array(fingerprint: List[int]): dim = 128 x = np.zeros(dim**2) x[fingerprint] = 1 x = x.reshape((dim, dim)) return x
from apiStorage import apiKey # helper functions # reusable client for handling API calls sFunctionFullClient = retinasdk.FullClient( apiKey, apiServer="http://api.cortical.io/rest", retinaName="en_synonymous") aFunctionFullClient = retinasdk.FullClient( apiKey, apiServer="http://api.cortical.io/rest", retinaName="en_associative") FunctionLiteClient = retinasdk.LiteClient(apiKey) # input: category - a fingerprint of the category filter # term - the term you want to add to the category # output: the resulting fingerprint of assimilating given term def assimilateTermInCategory(category, term): orExpression = {"or": [{"positions": category}, {"term": term}]} return sFunctionFullClient.getFingerprintForExpression( json.dumps(orExpression)).positions #input: FP1 - fingerprint 1 to be merged with FP2 # FP2 - fingerprint 2 # output: the resulting fingerprint of merging FP1 and FP2 def mergeFingerprints(FP1, FP2):
import math from gcc_phat import gcc_phat #generalized cross correlation phase transform import numpy as np import json import simpleaudio as sa import sys import speech_recognition as sr import pyaudio #api key for Cortical - keyword parser #use retina_sdk_florida.txt for florida import retinasdk liteClient = None with f as open("api_keys/retina_sdk.txt"): key = f.read().strip() liteClient = retinasdk.LiteClient(key) #for Watson sentiment analysis from watson_developer_cloud.natural_language_understanding_v1 \ import Features, EntitiesOptions, KeywordsOptions, SentimentOptions from watson_developer_cloud import NaturalLanguageUnderstandingV1 naturalLanguageUnderstanding = None with f as open("api_keys/watson.txt"): key = f.read().strip() naturalLanguageUnderstanding = NaturalLanguageUnderstandingV1( version='2018-11-16', iam_apikey=key) ### use watson_florida.txt for florida #for communication with other programs import time
import requests import math import json import argparse import urllib.request import itertools import retinasdk import credentials from bs4 import BeautifulSoup from selenium import webdriver # set paras liteClient = retinasdk.LiteClient("2fe758f0-8670-11e8-917d-b5028d671452") DANDELION_APP_ID = '159581bc091046b28e91a97ca4d5032f' DANDELION_APP_KEY = '159581bc091046b28e91a97ca4d5032f' ENTITY_URL1 = 'https://api.dandelion.eu/datatxt/nex/v1' ENTITY_URL2 = 'https://api.dandelion.eu/datatxt/sent/v1' #============================== # task1 form check #============================== def get_entities(text, confidence=0.5, lang='en'): payload = { '$app_id': DANDELION_APP_ID, '$app_key': DANDELION_APP_KEY, 'text': text, 'confidence': confidence, 'lang': lang,
import subprocess, re, json, random import retinasdk from sentence_selection_v2 import compute_document_score """ [{ original_sentence: "original sentence", gap_sentence: "sentence with gap", distractors: [], answer: 0 }] """ liteClient = retinasdk.LiteClient("f12af3f0-3a0d-11e8-9172-3ff24e827f76") def distractor_selection(): pass def gapify(sentences): #get the keywords for every sentence keywords = [] for sentence in sentences: keywords.append(liteClient.getKeywords(sentence)) #get rid of gap and find selector gap_questions = [] for i, sentence in enumerate(sentences): for k, key in enumerate(keywords[i]):
#!/usr/bin/env python3 import retinasdk liteClient = retinasdk.LiteClient("6ea5d540-4fb8-11ea-8f72-af685da1b20e") def get_key_words(text): return liteClient.getKeywords(text)
import retinasdk from misc import bcolors, testfiles import json import pandas as pd import numpy as np from sys import stdout liteClient = retinasdk.LiteClient("e29fcfe0") fullClient = retinasdk.FullClient("your_api_key", apiServer="http://api.cortical.io/rest", retinaName="en_associative") def compare_texts(texts1, texts2): print(bcolors.HEADER + "Compute similarity between sentences in dataframes:" + bcolors.ENDC) cosines = [] i = 0 l = len(texts1) for s1, s2 in zip(texts1, texts2): percent = i / l * 100 stdout.write("\r{0:.3f} %".format(percent)) stdout.flush() cosines.append( fullClient.compare(json.dumps([{ "text": s1 }, { "text": s2 }])).cosineSimilarity)
nltk.download('averaged_perceptron_tagger') from nltk.sentiment.vader import SentimentIntensityAnalyzer as sid #from random import * import simpleaudio as sa import json #import client #import socket #import json #import time from watson_developer_cloud import NaturalLanguageUnderstandingV1 from watson_developer_cloud.natural_language_understanding_v1 \ import Features, EntitiesOptions, KeywordsOptions, SentimentOptions import retinasdk #apiKey = "69ba0c10-5e17-11e9-8f72-af685da1b20e" apiKey = "f09d0fe0-3223-11e9-bb65-69ed2d3c7927" #FOR DEMO DAY ONLY liteClient = retinasdk.LiteClient(apiKey) import threading from threading import Lock, Thread lock = Lock() naturalLanguageUnderstanding = NaturalLanguageUnderstandingV1( version='2018-11-16', iam_apikey='_wxBEgRMBJ_WzXRWYzlTLYrNp3A0mmYEjKp-UQsdhvap') setup_bool = False confirmation_final = 1000 no_clue_final = 999 wakeup_final = 998 sleep_final = 997 move_final = 996
def setupCio(): """ Setup Cortical.io clients.""" apiKey = os.environ.get("CORTICAL_API_KEY") cioFullClient = retinasdk.FullClient(apiKey) cioLiteClient = retinasdk.LiteClient(apiKey) return cioFullClient, cioLiteClient
def txt_comp(answer_txt, key_txt): liteClient = retinasdk.LiteClient("4e5305c0-50e8-11ea-8f72-af685da1b20e") return liteClient.compare(answer_txt, key_txt)
from keras.models import Model from keras.layers import Input from sklearn.feature_extraction.text import TfidfVectorizer from math import isnan from textdistance import cosine pth = "your--path/my_stance_data/" vectorizer = TfidfVectorizer(max_features=2000) dfb = pd.read_csv(pth + "train_bodies.csv") dfh = pd.read_csv(pth + "train_stances.csv") lb = dfb.values.tolist() lh = dfh.values.tolist() voch, vocb = [], [] liteClient = retinasdk.LiteClient("d2690680-f10c-11e8-bb65-69ed2d3c7927") def lem(l): a = l.split() b = [] for w in a: b.append(_wnl.lemmatize(w)) return " ".join(b) def normalize_word(w): return _wnl.lemmatize(w).lower() def get_tokenized_lemmas(s):