예제 #1
0
    def EnterKeyword(self):

        window = Toplevel() # Create a sub-window 
        window.title("Tweet Viewer") # Set title

        keyword = self.keyword.get()

        keywords = keyword.split(",") 
        # print(keywords)
        # print(keyword)

        tl.main(keywords)
        print(keywords)
        tweet = StringVar()
        Senti_str = StringVar()
        Confi_str = StringVar()

        # Instantiates a client
        client = language.LanguageServiceClient()

        def getSentiment(input):
            document = types.Document(
                content=input,
                type=enums.Document.Type.PLAIN_TEXT)
            try:
                sentiment = client.analyze_sentiment(document=document).document_sentiment
                print("Succesfully Got sentiment")
                return [sentiment.score, sentiment.magnitude]
            except:
                return -1


        def Refresh():
            if len(tl.continuous_tweets) > 0: 
                tweets = tl.continuous_tweets[-1].get("full_tweet_message")
                #Todo Put in sentiment model here.
                sentiment = getSentiment(tweets)
                Senti_str.set(sentiment[0])
                Confi_str.set(sentiment[1])
                print(tweets)
                tweet.set(tweets)

        Label(window, textvariable = tweet, font=("Helvetica", 10)).grid(row = 3, 
            column = 2)
        Label(window, textvariable = Senti_str).grid(row = 4, 
            column = 5)
        Label(window, textvariable = Confi_str).grid(row = 5,
            column = 5)

        def Retweet():
            tweet_id = tl.continuous_tweets[-1].get("tweet_id")
            tool.retweet(tweet_id)


        Label(window, text = "Live Tweet Viewer").grid(row = 0, 
                                          column = 2)

        Label(window, text = "Sentiment:").grid(row = 4, 
                                    column = 2)
        Label(window, text = "Confidence:").grid(row = 5, 
                                    column = 2)


        Button(window, text = "Refresh", 
                                command = Refresh).grid( 
                                row = 8, column = 2)  
     
        Button(window, text = "Retweet", 
                                command = Retweet).grid( 
                                row = 8, column = 1)

        Button(window, text = "Respond", 
                                command = self.Respond).grid( 
                                row = 8, column = 3)
예제 #2
0
 def run(self):
     self.translate = translate.Client()
     self.language = language.LanguageServiceClient()
     self.app.run(host="0.0.0.0", port=int(os.environ.get('PORT', "8080")))
def runNewsAnalysis(stock, api):
    url = 'https://www.tradingview.com/screener/'

    from selenium import webdriver
    from selenium.webdriver.chrome.service import Service
    from selenium.webdriver.chrome.options import Options
    import time
    from bs4 import BeautifulSoup

    # Starts Chrome in headless mode using selenium
    # WINDOW_SIZE = "1920,1080"
    # chrome_options = Options()
    # chrome_options.add_argument('--headless')
    # chrome_options.add_argument("--window-size=%s" % WINDOW_SIZE)
    # chrome_options.binary_location = 'C:\Program Files (x86)\Google\Chrome\Application\chrome.exe'
    # service = Service('C:/Users/Sean/AppData/Local/Temp/Rar$EXa34860.36409/chromedriver.exe')
    # service.start()
    # driver = webdriver.Chrome(executable_path='C:/Users/Sean/AppData/Local/Temp/Rar$EXa34860.36409/chromedriver.exe',
    #                           chrome_options=chrome_options)
    # driver.get(url)
    # html = driver.find_element_by_css_selector('table')
    #
    # # Uses BeautifulSoup to parse table
    # soup = BeautifulSoup(driver.page_source, 'lxml')
    #
    # job_elems = soup.find_all('tbody', attrs={"class": "tv-data-table__tbody"})
    #
    # rows = job_elems[1].findAll('td')
    #
    # for row in rows:
    #     row = row.find('a')
    #     # print(row.findAll("tv-screener-table__signal tv-screener-table__signal--strong-buy"))
    #     # ans = row.findAll('a')
    #     if row is not None:
    #         print(row)

    # print(job_elems[1].findAll('td'))

    # driver.quit()

    # Instantiates a client
    # [START language_python_migration_client]
    client = language.LanguageServiceClient()
    # [END language_python_migration_client]

    # NewsAPI API call
    url = ('https://newsapi.org/v2/everything?'
           'apiKey=d42e88f1fb624084891e89df549c06ff&'
           'q=' + stock + '&'
           'sources=reuters, the-wall-street-journal, cnbc&'
           'language=en&'
           'sortBy=publishedAt&'
           'pageSize=100')
    response = requests.get(url).json()['articles']

    # Polygon News API call
    news = api.polygon.news(stock)

    file = open('news.txt', 'w')

    sentiment = 0
    for line in response:
        words = str(line['content'])
        file.write(words)

        document = {"content": words, "type": enums.Document.Type.PLAIN_TEXT}

        # Detects the sentiment of the text
        sentiment += client.analyze_sentiment(
            document=document,
            encoding_type=enums.EncodingType.UTF8).document_sentiment.magnitude

    for source in news:
        words = source.summary
        document = {"content": words, "type": enums.Document.Type.PLAIN_TEXT}

        # Detects the sentiment of the text
        sentiment += client.analyze_sentiment(
            document=document,
            encoding_type=enums.EncodingType.UTF8).document_sentiment.magnitude

    return sentiment
예제 #4
0
def listen_print_loop(responses):
    """Iterates through server responses and prints them.
    The responses passed is a generator that will block until a response
    is provided by the server.
    Each response may contain multiple results, and each result may contain
    multiple alternatives; for details, see https://goo.gl/tjCPAU.  Here we
    print only the transcription for the top alternative of the top result.
    In this case, responses are provided for interim results as well. If the
    response is an interim one, print a line feed at the end of it, to allow
    the next result to overwrite it, until the response is a final one. For the
    final one, print a newline to preserve the finalized transcription.
    """
    num_chars_printed = 0
    for response in responses:
        if not response.results:
            continue

        # The `results` list is consecutive. For streaming, we only care about
        # the first result being considered, since once it's `is_final`, it
        # moves on to considering the next utterance.
        result = response.results[0]
        if not result.alternatives:
            continue

        # Display the transcription of the top alternative.
        transcript = result.alternatives[0].transcript

        # Display interim results, but with a carriage return at the end of the
        # line, so subsequent lines will overwrite them.
        #
        # If the previous result was longer than this one, we need to print
        # some extra spaces to overwrite the previous result
        overwrite_chars = ' ' * (num_chars_printed - len(transcript))

        if not result.is_final:

            sys.stdout.write(transcript + overwrite_chars + '\r')
            sys.stdout.flush()

            num_chars_printed = len(transcript)

        else:
            print(transcript + overwrite_chars)

            #NEW SHIT HERE###
            # Instantiates a client
            clientNLP = language.LanguageServiceClient()

            # The text to analyze
            text = transcript + overwrite_chars
            document = types_language.Document(
                content=text, type=enums_language.Document.Type.PLAIN_TEXT)

            # Detects the sentiment of the text
            entities = clientNLP.analyze_entities(document).entities

            for entity in entities:
                # entity_type = enums.Entity.Type(entity.type)
                print('=' * 20)
                print("entity type: " + str(entity.type))
                print(u'{:<16}: {}'.format('name', entity.name))
                print(u'{:<16}: {}'.format('salience', entity.salience))

                if entity.type == 1:
                    if not Person.isNameInDB(entity.name):
                        new_person = Person.Person(entity.name)
                        foundPerson = True
                        print("WOWOW BABYYYY")
                        return True

            #         # Save image
            #         # Trigger model to train again

            # Exit recognition if any of the transcribed phrases could be
            # one of our keywords.
            if re.search(r'\b(exit|quit)\b', transcript, re.I):
                print('Exiting..')
                break

            num_chars_printed = 0

    return foundPerson
예제 #5
0
 def get_language_service(self):
     return language.LanguageServiceClient(credentials=self.credentials)
예제 #6
0
def helloWorld(ticker):
    l = []
    b = tweepy.Cursor(
        api.search,
        q=ticker,
        count=1,
        result_type="mixed",
        include_entities=True,
        lang="en",
        until="2017-09-29",
    ).items()
    for t in b:
        l.append(t)

    for d in [
            "2017-09-30", "2017-10-01", "2017-10-02", "2017-10-03",
            "2017-10-04", "2017-10-05", "2017-10-06", "2017-10-07"
    ]:
        b = tweepy.Cursor(api.search,
                          q=ticker,
                          count=2,
                          result_type="mixed",
                          include_entities=True,
                          lang="en",
                          until=d,
                          since_id=str(l[-1].id)).items()
        for t in b:
            l.append(t)
            dates = []
    for t in l:
        strng = ""
        for i in [t.created_at.year, t.created_at.month, t.created_at.day]:
            if i < 10:
                strng += "0"
            strng += str(i)
        dates.append(strng)

    import string
    printable = set(string.printable)

    # Instantiates a client
    client = language.LanguageServiceClient()

    # The text to analyze
    s = []
    m = []
    for t in l:
        text = _removeNonAscii(t.text)
        document = types.Document(content=text,
                                  type=enums.Document.Type.PLAIN_TEXT)

        # Detects the sentiment of the text
        sentiment = client.analyze_sentiment(
            document=document).document_sentiment
        s.append(sentiment.score)
        m.append(sentiment.magnitude)

    payload = {
        'fullUrl':
        "https://www.blackrock.com/tools/hackathon/performance?identifiers=" +
        ticker,
        'parameters': {
            'identifiers': ticker
        },
        'url':
        "/tools/hackathon/performance"
    }

    r = requests.get(
        "https://www.blackrock.com/tools/hackathon/performance?identifiers=" +
        ticker,
        params=payload)
    a = r.json()
    c = json_dumps(a)
    br = [c['resultMap']['RETURNS'][0]['returnsMap'][str(20170929)]['oneDay']]
    br.append(
        c['resultMap']['RETURNS'][0]['returnsMap'][str(20170928)]['oneDay'])
    br.append(
        c['resultMap']['RETURNS'][0]['returnsMap'][str(20170930)]['oneDay'])
    for d in range(20171001, 20171006):
        br.append(c['resultMap']['RETURNS'][0]['returnsMap'][str(d)]['oneDay'])

    df = pd.DataFrame({'day': dates[:901], 'magnitude': m, 'sentiment': s})
    df2 = df.groupby(['day']).mean()
    x = df2['magnitude'].tolist()
    x1 = df2['sentiment'].tolist()
    import numpy as np
    q = np.array([x[:-1], x1[:-1]])
    z = np.array(br)
    x = x[:-1]
    x1 = x1[:-1]
    regr = linear_model.LinearRegression()
    regr.fit(q.transpose(), z)
    nw = np.array([[x[-1]], [x1[-1]]])
    p = regr.predict(nw.transpose())
    x = z + [p]
    return [x, x1]
예제 #7
0
def get_all_tweets(screen_name):

    #Twitter only allows access to a users most recent 3240 tweets with this method

    #authorize twitter, initialize tweepy
    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_key, access_secret)
    api = tweepy.API(auth)
    client = language.LanguageServiceClient()
    #initialize a list to hold all the tweepy Tweets
    alltweets = []

    #make initial request for most recent tweets (200 is the maximum allowed count)
    new_tweets = api.user_timeline(screen_name=screen_name, count=10)

    #save most recent tweets
    alltweets.extend(new_tweets)

    #save the id of the oldest tweet less one
    oldest = alltweets[-1].id - 1

    #keep grabbing tweets until there are no tweets left to grab
    while len(new_tweets) > 0:

        #all subsiquent requests use the max_id param to prevent duplicates
        new_tweets = api.user_timeline(screen_name=screen_name,
                                       count=10,
                                       max_id=oldest)

        #save most recent tweets
        alltweets.extend(new_tweets)

        #update the id of the oldest tweet less one
        oldest = alltweets[-1].id - 1
        if (len(alltweets) > 15):
            break
        print("...%s tweets downloaded so far" % (len(alltweets)))

    #write tweet objects to JSON
    file = open('tweet.json', 'w')
    print("Writing tweet objects to JSON please wait...")
    for status in alltweets:
        json.dump(status._json, file, sort_keys=True, indent=4)

    #close the file
    print("Done")
    file.close()

    for t in alltweets:

        text = t.text
        print("%s" % t.text)
        document = types.Document(content=text,
                                  type=enums.Document.Type.PLAIN_TEXT)
        print("\nAnalysis:\n")
        # Detects the sentiment of the text
        sentiment = client.analyze_sentiment(
            document=document).document_sentiment
        keywords = client.analyze_entities(document=document).entities
        print('Sentiment: {}, {}'.format(sentiment.score, sentiment.magnitude))
        print("keywords:\n")
        for e in keywords:
            print(e.name, e.salience)
        print(
            "===============================================================")
import praw
from .api_keys import reddit_client_id, reddit_client_secret, reddit_user_agent

# Import NotFound to validate subreddit form
from prawcore import NotFound

# Import time modules for UTC to local timezone conversion
import time

# Imports the Google Cloud client library
from google.cloud import language
from google.cloud.language import enums
from google.cloud.language import types

# Create a Google Cloud client instance
google_client = language.LanguageServiceClient()

# PRAW Manager class
class RedditScrapeManager:
    def __init__(self, subreddit):
        # Receive arguments from object
        self.subreddit = subreddit
        # Initialize list to store dictionaries of submission info
        self.master_submission_data_list = []
        # Configure read-only reddit instance to query API for data
        self.reddit = praw.Reddit(client_id=reddit_client_id,
                                  client_secret=reddit_client_secret,
                                  user_agent=reddit_user_agent)

        # Create subreddit instance to pass on to class methods
        self.subreddit_instance = self.reddit.subreddit(self.subreddit)
예제 #9
0
파일: scrp.py 프로젝트: CodeChefVIT/TARDIS
r = requests.get(url)
x = r.json()

y = x['results']  ## Storing the results child in the json

for i in range(len(y)):
    #print(y[i]['name'],y[i]['place_id'])
    url2 = "https://maps.googleapis.com/maps/api/place/details/json?key=AIzaSyA1NVeXFdIsleXRor_x4bKKsLonWkd0Jm4&language=en&placeid=" + y[
        i]['place_id']
    ## Using the place API to get details of the reviews of the hospital in the given city
    r = requests.get(url2)

    x = r.json()
    z = x['result']['reviews']  ## Extracting only the reviews from the details

    client = language.LanguageServiceClient(
    )  ##Google AutoML Nalutral Language Client Initialization
    r1 = 0  ## Variable to store the total sentiment value for the hospital
    for p in z:
        ## Clean the text to remove non english characters
        content = cleaner(p['text'])
        #print(content+"  ")
        document = types.Document(content=content,
                                  type=enums.Document.Type.PLAIN_TEXT)
        try:
            annotations = client.analyze_sentiment(
                document=document
            )  ## Use google API to perfrom sentiment analysis
            val = print_result(annotations)
            r1 = r1 + val  ## Append to the total sentiment value of the hospital
            l = [[y[i]['name'], city, content, val]]  ## Create a list
            df = df.append(l)  ## Append details to the dataframe
예제 #10
0
파일: test.py 프로젝트: yuvalbar23/anyway
def get_location_of_text(input_text, maps_key):
    no_random_road_groups = []
    no_hospital_loc_groups = []
    loc_groups = []
    biggest_group_index = -1
    reference_grouping = False

    # Instantiates the clients
    client = language.LanguageServiceClient()
    translate_client = translate.Client()

    # Translate
    result = translate_client.translate(input_text, target_language='en', source_language='iw')
    translated_text = result['translatedText']
    translated_text = html.unescape(translated_text)
    # Pre-processing - from what I saw only the first line has the location
    translated_text = list(filter(None, translated_text.split('.')))[0]
    # Analyze (Named Entity Recognition)
    document = types.Document(content=translated_text, type=enums.Document.Type.PLAIN_TEXT)
    response = client.analyze_entities(document=document)
    # Getting the location entities and their indices in the text and adding them to a list
    loc_entities = []
    loc_entities_indices = []
    translated_text_word_split = list(filter(None, translated_text.split(' ')))
    loc_entities_word_indices = []
    for entity in response.entities:
        if entity.type == enums.Entity.Type.LOCATION:
            print('=' * 20)
            print('name: {0}'.format(entity.name))
            if ' ' in entity.name:
                for item in list(filter(None, entity.name.split(' '))):
                    loc_entities.append(item)
                    loc_entities_indices.append(translated_text.index(entity.name) + entity.name.index(item))
            else:
                loc_entities.append(entity.name)
                loc_entities_indices.append(translated_text.index(entity.name))
                # In case there is a reference to a previous location
            if 'city' == entity.name.lower() or 'town' == entity.name.lower() or 'village' == entity.name.lower() or \
                    'junction' == entity.name.lower() or 'interchange' == entity.name.lower() or \
                    'intersect' == entity.name.lower() or 'street' == entity.name.lower():
                reference_grouping = True
    # Original order
    print(translated_text)
    print(loc_entities)
    print(loc_entities_indices)
    # Sort entities by appearing order in the string
    loc_entities = [x for _, x in sorted(zip(loc_entities_indices, loc_entities))]
    loc_entities_new = []
    for item in loc_entities:
        loc_entities_word_indices.append(
            [idx for idx, s in enumerate(translated_text_word_split) if item in s][loc_entities_new.count(item)])
        loc_entities_new.append(item)
    loc_entities = loc_entities_new
    print('\n \n \n')
    print(loc_entities)
    print(loc_entities_word_indices)
    print('reference grouping ' + str(reference_grouping))
    # Copy the string containing the entities for relational data between them
    if len(loc_entities) >= 1:
        # Location grouping - takes the largest group of words indicating location based on distance between groups
        diff = [loc_entities_word_indices[i + 1] - loc_entities_word_indices[i] for i in
                range(len(loc_entities_word_indices) - 1)]
        print(diff)
        if max(diff) > 5:  # distance is greater than 5 words
            avg = sum(diff) / len(diff)
            loc_groups = [[loc_entities_word_indices[0]]]
            for x in loc_entities_word_indices[1:]:
                if x - loc_groups[-1][-1] < avg:
                    loc_groups[-1].append(x)
                else:
                    loc_groups.append([x])
            print(loc_groups)
            no_random_road_groups = [group for group in loc_groups
                                     if
                                     not (len(group) == 1 and 'road' == translated_text_word_split[group[0]].lower())]
            no_hospital_loc_groups = [group for group in no_random_road_groups
                                      if not
                                      any('hospital' in translated_text_word_split[item].lower() for item in group)]
            bounds_loc_groups = [i[-1] - i[0] for ind, i in enumerate(no_hospital_loc_groups)]
            biggest_group_index = bounds_loc_groups.index(max(bounds_loc_groups))
            loc_entities = [translated_text_word_split[item] for item in no_hospital_loc_groups[biggest_group_index]]
            print(loc_entities)
        # Getting the full string from the text indicating the location and not just entities
        translated_location = translated_text[
                              translated_text.index(loc_entities[0]):translated_text.index(loc_entities[-1]) + len(
                                  loc_entities[-1])]
        print(translated_location)
        if translated_text[translated_text.index(loc_entities[0]) - 4:translated_text.index(loc_entities[0])].lower() \
                == 'the ':
            translated_location = translated_text[
                                  translated_text.index(loc_entities[0]) - 4:translated_text.index(
                                      loc_entities[-1]) + len(
                                      loc_entities[-1])]
        print(translated_location)
        if translated_location.lower().startswith('street') or translated_location.lower().startswith('interchange') \
                or translated_location.lower().startswith('village') or translated_location.lower().startswith('town') \
                or translated_location.lower().startswith('city') or translated_location.lower().startswith(
            'intersection') \
                or translated_location.lower().startswith('junction'):
            translated_location = translated_text_word_split[translated_text_word_split.index(loc_entities[0]) - 1] \
                                  + ' ' + translated_location
            reference_grouping = False
        print(translated_location)
        print('\n\n\n')
        # Trying to solve the reference in case there is another group
        if reference_grouping and len(no_hospital_loc_groups) >= 2:
            print('xd0')
            previous = sys.maxsize
            if biggest_group_index > 0:
                previous = no_hospital_loc_groups[biggest_group_index][0] - \
                           no_hospital_loc_groups[biggest_group_index - 1][-1]
            if previous != sys.maxsize:
                text_to_replace = translated_text_word_split[
                    no_hospital_loc_groups[biggest_group_index - 1][-1]]
                print('text to replace' + text_to_replace)
                if len(no_hospital_loc_groups[biggest_group_index - 1]) > 1:
                    last = no_hospital_loc_groups[biggest_group_index - 1][-1]
                    for index, val in enumerate(loc_groups[biggest_group_index - 1][::-1][1:]):
                        if last - val == 1:
                            text_to_replace = translated_text_word_split[
                                                  no_hospital_loc_groups[biggest_group_index - 1][
                                                      -2 - index]] + ' ' + text_to_replace
                            last = val
                        else:
                            break
                translated_location = translated_location.replace(
                    'the junction', text_to_replace).replace(
                    'the intersect', text_to_replace).replace(
                    'the interchange', text_to_replace).replace(
                    'the street', text_to_replace).replace(
                    'the city', text_to_replace).replace(
                    'the town', text_to_replace).replace(
                    'the village', text_to_replace)
        elif reference_grouping and len(no_random_road_groups) >= 2:
            print('check 0')
            previous = sys.maxsize
            bounds_loc_groups = [i[-1] - i[0] for ind, i in enumerate(no_random_road_groups)]
            biggest_group_index = bounds_loc_groups.index(max(bounds_loc_groups))
            if biggest_group_index > 0:
                previous = no_random_road_groups[biggest_group_index][0] - \
                           no_random_road_groups[biggest_group_index - 1][-1]
            if previous != sys.maxsize and 'hospital' not in \
                    translated_text_word_split[no_random_road_groups[biggest_group_index - 1][-1]].lower():
                print('check3')
                text_to_replace = translated_text_word_split[
                    no_random_road_groups[biggest_group_index - 1][-1]]
                print('text to replace' + text_to_replace)
                if len(no_random_road_groups[biggest_group_index - 1]) > 1:
                    last = no_random_road_groups[biggest_group_index - 1][-1]
                    for index, val in enumerate(loc_groups[biggest_group_index - 1][::-1][1:]):
                        if last - val == 1:
                            text_to_replace = translated_text_word_split[
                                                  no_random_road_groups[biggest_group_index - 1][
                                                      -2 - index]] + ' ' + text_to_replace
                            last = val
                        else:
                            break
                translated_location = translated_location.replace(
                    'the junction', text_to_replace).replace(
                    'the intersect', text_to_replace).replace(
                    'the interchange', text_to_replace).replace(
                    'the street', text_to_replace).replace(
                    'the city', text_to_replace).replace(
                    'the town', text_to_replace).replace(
                    'the village', text_to_replace)

    elif len(loc_entities) == 1:
        translated_location = loc_entities
        if translated_text[translated_text.index(loc_entities[0]) - 4:translated_text.index(loc_entities[0])].lower() \
                == 'the ':
            translated_location = translated_text[
                                  translated_text.index(loc_entities[0]):translated_text.index(loc_entities[0]) + len(
                                      loc_entities[0])]
        if translated_location.lower().startswith('street') or translated_location.lower().startswith('interchange') \
                or translated_location.lower().startswith('village') or translated_location.lower().startswith('town') \
                or translated_location.lower().startswith('city') or translated_location.lower().startswith(
            'intersection') \
                or translated_location.lower().startswith('junction'):
            translated_location = translated_text_word_split[translated_text_word_split.index(loc_entities[0]) - 1] \
                                  + ' ' + translated_location
    else:
        translated_location = ''
    translated_location = translated_location.strip()
    if ',' == translated_location[-1]:
        translated_location = translated_location[:-1]
    location = html.unescape(translated_location)
    gmaps = googlemaps.Client(key=maps_key)
    print('location: ' + location)
    geocode_result = gmaps.geocode(location)
    if geocode_result is None or geocode_result == []:
        return None
    country = ''
    print(geocode_result)
    for address in geocode_result[0]['address_components']:
        if any('country' in s for s in address['types']):
            country = address['short_name']
            break
    if country == 'IL':
        print(geocode_result[0]['geometry']['location'])
    else:
        return None
예제 #11
0
 def __init__(self, logs_to_cloud):
     self.logs = Logs(name="analysis", to_cloud=logs_to_cloud)
     self.language_client = language.LanguageServiceClient()
     self.twitter = Twitter(logs_to_cloud=logs_to_cloud)
예제 #12
0
 def __init__(self):
     self.client = language.LanguageServiceClient()
     self.doc_type = 'HTML'
    def __init__(self, crendentials_file=None):
        if crendentials_file:
            os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = crendentials_file

        # Initialize google cloud language service client
        self.client = language.LanguageServiceClient()
예제 #14
0
 def __init__(self):
     self.client = language.LanguageServiceClient()
예제 #15
0
from google.cloud import speech_v1
from google.cloud import language
#from google.cloud.speech_v1 import enums
from google.oauth2 import service_account
from google.cloud.speech import types
from google.cloud.language import enums
from google.cloud.language import types as langtypes
from recordtest import Recorder
from recordtest import RecordingFile
import six

credentials = service_account.Credentials.from_service_account_file("ventmo-e430325bdb43.json")
client = speech_v1.SpeechClient(credentials = credentials)
langclient = language.LanguageServiceClient(credentials = credentials)

#encoding = enums.RecognitionConfig.AudioEncoding.LINEAR16
#sample_rate_hertz = 16600
language_code = 'en-US'
config = {'language_code': language_code}
#uri = '../Documents/Sound recordings/test-voice.m4a'
#audio = {'uri': uri}
# The name of the audio file to transcribe
rec = Recorder()
text = input("enter s to start >> ")  # Python 3
if text == 's':
    with rec.open('test.wav', 'wb') as recfile:
        print("recording for 5 seconds")
        recfile.record(duration=5.0)
file_name = "test.wav"

# Loads the audio into memory
예제 #16
0
def create_game(users):
    """
	Create games from curated list, including comments
	"""
    f = 'gamedata.json'
    data = None
    with open(os.path.join(settings.BASE_DIR, 'scripts', f),
              'rb') as data_file:
        data = json.load(data_file)

    games = data['games']
    for d in games:
        name = d["name"].encode('ascii', 'ignore').decode('ascii')
        desc = d["description"].encode('ascii', 'ignore').decode('ascii')
        desc = desc[:1024] if len(desc) > 2048 else desc

        print("Creating game {}...".format(name))
        game, _ = Game.objects.get_or_create(
            title=name,
            description=desc,
            platform=d["platform"],
            img_url=d["img_url"],
            genre=d["genre"],
            rating=d["rating"],
            release_date=datetime.datetime.strptime(
                d["release_date"], "%b %d, %Y") if d["release_date"] else None,
            publisher=d["publisher"],
            developer=d["developer"],
            na_sales=d["na_sales"] if d["na_sales"] else None,
            eu_sales=d["eu_sales"] if d["eu_sales"] else None,
            jp_sales=d["jp_sales"] if d["jp_sales"] else None,
            ot_sales=d["rw_sales"] if d["rw_sales"] else None,
            gb_sales=d["gl_sales"] if d["gl_sales"] else None,
            critic_score=d["critic_score"] if d["critic_score"] else None,
            critic_count=d["critic_count"] if d["critic_count"] else None,
            user_score=d["user_score"] if d["user_score"] else None,
            user_count=d["user_count"] if d["user_count"] else None,
        )

        critic_reviews = d['critic_reviews']
        print("Creating {} critic reviews for game {}...".format(
            len(critic_reviews), name))

        client = None
        if os.environ.get("GAE_INSTANCE") or os.environ.get("ENABLE_CLOUD"):
            client = language.LanguageServiceClient()

        for c in critic_reviews[:10]:
            # Quick and dirty way to restrict length of comment to 1024 characters
            desc = c[:1024] if len(c) > 1024 else c
            desc = desc.encode('ascii', 'ignore')

            # Create comment first, and then apply NLP
            comment, created = Comment.objects.get_or_create(
                game=game,
                description=desc,
                is_critic=True,
                is_user=False,
            )

            if os.environ.get("GAE_INSTANCE") or os.environ.get(
                    "ENABLE_CLOUD"):
                try:
                    if created:
                        document = types.Document(
                            content=desc, type=enums.Document.Type.PLAIN_TEXT)
                        # Detects the sentiment of the text
                        sentiment = client.analyze_sentiment(
                            document=document).document_sentiment
                        print('Sentiment: {}, {}'.format(
                            sentiment.score, sentiment.magnitude))

                        comment.sentiment_score = format(
                            sentiment.score, '.3f')
                        comment.sentiment_magnitude = format(
                            sentiment.magnitude, '.3f')
                        comment.save()
                        time.sleep(0.4)
                except:
                    print("NLP error occurred...skipping")
                    pass
            else:
                comment.sentiment_score = 0
                comment.sentiment_magnitude = 0
                comment.save()

        user_reviews = d['user_reviews']
        print("Creating {} user reviews for game {}...".format(
            len(user_reviews), name))
        for u in user_reviews[:10]:
            # Quick and dirty way to restrict length of comment to 1024
            # characters
            desc = u[:1024] if len(u) > 1024 else u
            desc = desc.encode('ascii', 'ignore')

            # Create comment first, and then apply NLP
            comment, created = Comment.objects.get_or_create(
                game=game,
                description=desc,
                is_critic=False,
                is_user=True,
            )

            if os.environ.get("GAE_INSTANCE") or os.environ.get(
                    "ENABLE_CLOUD"):
                # Detects the sentiment of the text
                try:
                    if created:
                        document = types.Document(
                            content=desc, type=enums.Document.Type.PLAIN_TEXT)

                        sentiment = client.analyze_sentiment(
                            document=document).document_sentiment
                        print('Sentiment: {}, {}'.format(
                            sentiment.score, sentiment.magnitude))

                        comment.sentiment_score = format(
                            sentiment.score, '.3f')
                        comment.sentiment_magnitude = format(
                            sentiment.magnitude, '.3f')
                        comment.save()
                        time.sleep(0.4)
                except:
                    print("NLP error occurred...skipping")
                    pass
            else:
                comment.sentiment_score = 0
                comment.sentiment_magnitude = 0
                comment.save()

    # Pick a random assortment of games
    choices = ['WANT TO PLAY', 'HAVE PLAYED', 'NEVER', 'CURRENTLY PLAYING']

    for u in users:
        game_picks = random.sample(games, 5)
        friends = users
        friends.remove(u)
        friend_pick = random.choice(friends)
        choice = random.choice(choices)

        for i in game_picks:
            options = {'title': i['name'], 'platform': i['platform']}
            game_pick = Game.objects.get(**options)
            # Curate game list for this user
            GameList.objects.get_or_create(user=u, game=game_pick, type=choice)

        Friend.objects.get_or_create(user=u, friend=friend_pick)
예제 #17
0
def sentiment(text):
    client = language.LanguageServiceClient()
    document = types.Document(content=text,
                              type=enums.Document.Type.PLAIN_TEXT)
    results = client.analyze_sentiment(document).document_sentiment
    return results
예제 #18
0
from googleapiclient.discovery import build
from oauth2client.file import Storage
from flask_wtf import FlaskForm
from wtforms import TextField
from requests import get
from bs4 import BeautifulSoup
# Imports the Google Cloud client library
from google.cloud import language
from google.cloud.language import enums
from google.cloud.language import types

app = flask.Flask(__name__)
app.config.from_object(__name__)
app.config['SECRET_KEY'] = '7d441f27d441f27567d441f2b6176a'
# Instantiates a client
client_language = language.LanguageServiceClient()


@app.route('/')
def index():
    credentials = get_credentials()
    if credentials == False:
        return flask.redirect(flask.url_for('oauth2callback'))
    elif credentials.access_token_expired:
        return flask.redirect(flask.url_for('oauth2callback'))
    else:
        print('now calling fetch')
        form = SlideForm()
        return flask.render_template("index.html", form=form)

예제 #19
0
 def __init__(self):
     # Instantiate a client
     self.client = language.LanguageServiceClient()
예제 #20
0
        magnitude = annotations.document_sentiment.magnitude
        language = annotations.language
        print(sentiment, magnitude, language, comment)
        value = (page[15], sentiment, magnitude, language, str(page[5]))
        values = f'{values}{str(value)},'  # this is probably implicit but based on testing I left str()
    # in the previous loop we go through all 100 hits and now we insert them

    if values != '':
        q = insert(bq_client, values)
    return rows.next_page_token


if __name__ == '__main__':
    # start out client objects
    bq_client = bigquery.Client(project='zhmichael1')
    nlp_client = language.LanguageServiceClient()
    # get first page here to have a token ready so we can do a loop easily
    token = get_page(bq_client, nlp_client, token=None, max_results=100)
    # So we need to handle the quota problem of NL API. It allows 600 rpm.
    # We are doing chunks of 100 so to be safe, I sleep for 13 seconds in between chunks.
    # My goal is to not hit a quota problem and let it run continuously at a steady pace
    # so I don't have to rely on the fallback Retry() above.
    #time.sleep(13)
    while True:
        if token:  # token returns none when nothing else left
            token = get_page(bq_client,
                             nlp_client,
                             token=token,
                             max_results=100)
            #time.sleep(13)
        else:
예제 #21
0
def lang(text):
    # [START language_quickstart]
    # Imports the Google Cloud client library
    # [START language_python_migration_imports]
    from google.cloud import language
    from google.cloud.language import enums
    from google.cloud.language import types
    # [END language_python_migration_imports]

    # Instantiates a client
    # [START language_python_migration_client]
    client = language.LanguageServiceClient()
    # [END language_python_migration_client]

    print()

    # Instantiates a plain text document.
    document = types.Document(content=text,
                              type=enums.Document.Type.PLAIN_TEXT)

    # Detects syntax in the document
    tokens = client.analyze_syntax(document).tokens

    # part-of-speech tags from enums.PartOfSpeech.Tag
    pos_tag = ('UNKNOWN', 'ADJ', 'ADP', 'ADV', 'CONJ', 'DET', 'NOUN', 'NUM',
               'PRON', 'PRT', 'PUNCT', 'VERB', 'X', 'AFFIX')

    d = {}

    for token in tokens:
        if pos_tag[token.part_of_speech.tag] is 'NOUN' or pos_tag[
                token.part_of_speech.tag] is 'NUM':
            print(u'{}: {}'.format(pos_tag[token.part_of_speech.tag],
                                   token.text.content))
            d['{}'.format(token.text.content)] = '{}'.format(
                pos_tag[token.part_of_speech.tag])
            print(d)

    if d != {}:
        print('d: ', d)

    yesrecur = 0

    if (d['month']) or ('year' in d) or ('week' in d):
        yesrecur = 1
        recur = 'Yes'
        if 'month' in d:
            interval = 'month'
        if 'year' in d:
            interval = 'year'
        if 'week' in d:
            interval = 'week'

    inv_d = {v: k for k, v in d.items()}
    don_amt = inv_d['NUM']

    print('Donation amount: ', don_amt)
    if yesrecur == 1:
        print('Recurring?: ', recur)
        print('Interval: ', interval)
    else:
        print('This donation does not recur.')

    #detects the sentiment of the text
    sentiment = (client.analyze_sentiment(
        document=document).document_sentiment)

    if sentiment.score == 0.0:
        print(
            'The sentiment score is {:.1f}, therefore the sentiment is mixed.'.
            format(sentiment.score))
    elif sentiment.score >= 0.4:
        print(
            'The sentiment score is {:.1f}, therefore the sentiment is positive.'
            .format(sentiment.score))
    elif sentiment.score <= -0.3:
        print(
            'The sentiment score is {:.1f}, therefore the sentiment is negative.'
            .format(sentiment.score))
    elif sentiment.score > -0.3 and sentiment.score < 0.4:
        print(
            'The sentiment score is {:.1f}, therefore the sentiment is neutral.'
            .format(sentiment.score))
예제 #22
0
def analyze_entities_api(text='', verbose=False):
    """
    ref: https://cloud.google.com/natural-language/docs/reference/rpc/google.cloud.language.v1#google.cloud.language.v1.AnalyzeEntitiesResponse
    name:
          The representative name for the entity.
    type:
          The entity type.
    metadata:
          Metadata associated with the entity.  Currently, Wikipedia
          URLs and Knowledge Graph MIDs are provided, if available. The
          associated keys are "wikipedia\_url" and "mid", respectively.
    salience:
          The salience score associated with the entity in the [0, 1.0]
          range.  The salience score for an entity provides information
          about the importance or centrality of that entity to the
          entire document text. Scores closer to 0 are less salient,
          while scores closer to 1.0 are highly salient.
    mentions:
          The mentions of this entity in the input document. The API
          currently supports proper noun mentions.
    sentiment:
          For calls to [AnalyzeEntitySentiment][] or if [AnnotateTextReq
          uest.Features.extract\_entity\_sentiment][google.cloud.languag
          e.v1.AnnotateTextRequest.Features.extract\_entity\_sentiment]
          is set to true, this field will contain the aggregate
          sentiment expressed for this entity in the provided document.

    :param document:
    :param verbose:
    :return: (entity.name, entity.type)
    """
    """Detects entities in the text."""
    text = text.lower()  #apparently entity search fails if there are capitals

    client = language.LanguageServiceClient()

    if isinstance(text, six.binary_type):
        text = text.decode('utf-8')

    document = types.Document(
        content=text,
        type=enums.Document.Type.PLAIN_TEXT)

    # Detects entities in the document. You can also analyze HTML with:
    #   document.type == enums.Document.Type.HTML
    entities = client.analyze_entities(document).entities

    # entity types from enums.Entity.Type
    # TODO: specify only entities that we are interested in finding?
    entity_type = ('UNKNOWN', 'PERSON', 'LOCATION', 'ORGANIZATION',
                   'EVENT', 'WORK_OF_ART', 'CONSUMER_GOOD', 'OTHER')

    # is it a full name, or a common noun?
    entity_mention_type = ('TYPE_UNKNOWN', 'PROPER', 'COMMON')

    list_of_entities = []
    # todo: key entry by relevance or by entity name!?
    for entity in entities:
        list_of_entities.append({
            entity.name: {
                "entity salience": entity.salience,
                "entity type": entity_type[entity.type]
            }
        })
        #list_of_entities.append((entity.name, entity_type[entity.type], '{:.2f}'.format(entity.salience)))

    return list_of_entities
예제 #23
0
def scanner():
    counter = 1
    total = 0
    results = []
    for i in url:
        data = requests.get(i)
        html = BeautifulSoup(data.text, 'html.parser')
        timeline = html.select('#timeline li.stream-item')
        for tweet in timeline:
            tweet_id = tweet['data-item-id']
            tweet_text = tweet.select('p.tweet-text')[0].get_text()
            all_tweets.append({"id": tweet_id, "text": tweet_text})
            if "market" in tweet_text or "economy" in tweet_text or "deal" in tweet_text:
                print(tweet_text)
            content = tweet_text
            client = language.LanguageServiceClient()
            document = types.Document(content=content,
                                      type=enums.Document.Type.PLAIN_TEXT)
            annotations = client.analyze_sentiment(document=document)
            magnitude = annotations.document_sentiment.magnitude
            print(
                "On a scale of -1 to 1, the sentiment score of the spoken text is "
                + str(magnitude))
            if magnitude != 0:
                counter += 1
                total += magnitude
        results.append(total / counter)
    avgTot = 0
    for i in results:
        avgTot += i
    avgTot /= len(results)
    if avgTot < .30:
        status = "News analysis points towards a market fall"
        banner = "red"
        #"sell"
    if avgTot > .30 and i < .55:
        status = "News analysis is neutral at this time"
        banner = "yellow"
        #hold
    if avgTot > .55:
        status = "News analysis predicts the marketing going up"
        banner = "green"
    avgTot = avgTot * 100
    timefound = now.strftime("%Y-%m-%d %H:%M")
    dashboard = """/*!

=========================================================
* Paper Dashboard React - v1.1.0
=========================================================

* Product Page: https://www.creative-tim.com/product/paper-dashboard-react
* Copyright 2019 Creative Tim (https://www.creative-tim.com)

* Licensed under MIT (https://github.com/creativetimofficial/paper-dashboard-react/blob/master/LICENSE.md)

* Coded by Creative Tim

=========================================================

* The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

*/
import React from "react";
// react plugin used to create charts
import { Line, Pie } from "react-chartjs-2";
// reactstrap components
import {
  Card,
  CardHeader,
  CardBody,
  CardFooter,
  CardTitle,
  Row,
  Col
} from "reactstrap";
// core components
import {
  dashboard24HoursPerformanceChart,
  dashboardEmailStatisticsChart,
  dashboardNASDAQChart
} from "variables/charts.jsx";



class Dashboard extends React.Component {
  render() {
    return (
      <>
        <div className="content">
          <Row>
            <Col md="12">
              <Card>
                <CardHeader>
                  <CardTitle tag="h5">""" + status + """</CardTitle>
                  <p className="card-category">The current calculated sentiment analysis value is: </p>""" + str(
        avgTot) + """
                </CardHeader>
                
                <CardFooter>
                  <hr />
                  <div className="stats">
                    <i className="fa fa-history" /> Updated """ + timefound + """
                  </div>
                </CardFooter>
              </Card>
            </Col>
          </Row>
          <Row>
          <Col md="6" sm="6">
              <Card className="card-stats">
                <CardBody>
                <iframe src="https://us.etrade.com/e/t/user/login"
                frameBorder="0"
                height="425"
                width = "100%"></iframe>
                </CardBody>
              </Card>
            </Col>
            <Col md="6" sm = "6">
              <Card className="card-chart">
                <CardHeader>
                  <CardTitle tag="h5">Sentiment Analysis Rating</CardTitle>
                  <p className="card-category">On a Scale from 0 to 100</p>
                </CardHeader>
                <CardBody>
                  <Line
                    data={dashboardNASDAQChart.data}
                    options={dashboardNASDAQChart.options}
                    width={400}
                    height={200}
                  />
                </CardBody>
                <CardFooter>
                  <div className="chart-legend">
                  </div>
                  <hr />
                  <div className="card-stats">
                    <i className="fa fa-check" /> Data information certified
                  </div>
                </CardFooter>
              </Card>
            </Col>
          </Row>
        </div>
      </>
    );
  }
}

export default Dashboard;"""

    charts = """/*!
=========================================================
* Paper Dashboard React - v1.1.0
=========================================================
* Product Page: https://www.creative-tim.com/product/paper-dashboard-react
* Copyright 2019 Creative Tim (https://www.creative-tim.com)
* Licensed under MIT (https://github.com/creativetimofficial/paper-dashboard-react/blob/master/LICENSE.md)
* Coded by Creative Tim
=========================================================
* The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
*/
const dashboard24HoursPerformanceChart = {
  data: canvas => {
    return {
      labels: [
        "Jan",
        "Feb",
        "Mar",
        "Apr",
        "May",
        "Jun",
        "Jul",
        "Aug",
        "Sep",
        "Oct"
      ],
      datasets: [
        {
          borderColor: "#6bd098",
          backgroundColor: "#6bd098",
          pointRadius: 0,
          pointHoverRadius: 0,
          borderWidth: 3,
          data: [300, 310, 316, 322, 330, 326, 333, 345, 338, 354]
        },
        {
          borderColor: "#f17e5d",
          backgroundColor: "#f17e5d",
          pointRadius: 0,
          pointHoverRadius: 0,
          borderWidth: 3,
          data: [320, 340, 365, 360, 370, 385, 390, 384, 408, 420]
        },
        {
          borderColor: "#fcc468",
          backgroundColor: "#fcc468",
          pointRadius: 0,
          pointHoverRadius: 0,
          borderWidth: 3,
          data: [370, 394, 415, 409, 425, 445, 460, 450, 478, 484]
        }
      ]
    };
  },
  options: {
    legend: {
      display: false
    },

    tooltips: {
      enabled: false
    },

    scales: {
      yAxes: [
        {
          ticks: {
            fontColor: "#9f9f9f",
            beginAtZero: false,
            maxTicksLimit: 5
            //padding: 20
          },
          gridLines: {
            drawBorder: false,
            zeroLineColor: "#ccc",
            color: "rgba(255,255,255,0.05)"
          }
        }
      ],

      xAxes: [
        {
          barPercentage: 1.6,
          gridLines: {
            drawBorder: false,
            color: "rgba(255,255,255,0.1)",
            zeroLineColor: "transparent",
            display: false
          },
          ticks: {
            padding: 0.1,
            fontColor: "#9f9f9f"
          }
        }
      ]
    }
  }
};

const dashboardEmailStatisticsChart = {
  data: canvas => {
    return {
      labels: [1, 2, 3],
      datasets: [
        {
          label: "Emails",
          pointRadius: 0,
          pointHoverRadius: 0,
          backgroundColor: ["#e3e3e3", "#4acccd", "#fcc468", "#ef8157"],
          borderWidth: 0,
          data: [342, 480, 530, 120]
        }
      ]
    };
  },
  options: {
    legend: {
      display: false
    },

    pieceLabel: {
      render: "percentage",
      fontColor: ["white"],
      precision: 2
    },

    tooltips: {
      enabled: false
    },

    scales: {
      yAxes: [
        {
          ticks: {
            display: false
          },
          gridLines: {
            drawBorder: false,
            zeroLineColor: "transparent",
            color: "rgba(255,255,255,0.05)"
          }
        }
      ],

      xAxes: [
        {
          barPercentage: 1.6,
          gridLines: {
            drawBorder: false,
            color: "rgba(255,255,255,0.1)",
            zeroLineColor: "transparent"
          },
          ticks: {
            display: false
          }
        }
      ]
    }
  }
};

const dashboardNASDAQChart = {
  data: {
    labels: [
      "@realdonaldtrump",
      "@guardiannews",
      "@TheEconomist",
      "@washingtonpost",
      "@TheStalwart",
      "@VitalikButerin",
      "@cnnbrk",
      "@nytimes"
    ],
    datasets: [
      {
        data: [""" + str(results[0]) + """,""" + str(
        results[1]) + """,""" + str(results[2]) + """,""" + str(
            results[3]) + """,""" + str(results[4]) + """,""" + str(
                results[5]) + """,""" + str(results[6]) + """,""" + str(
                    results[7]) + """],
        fill: false,
        borderColor: "#51CACF",
        backgroundColor: "transparent",
        pointBorderColor: "#51CACF",
        pointRadius: 4,
        pointHoverRadius: 4,
        pointBorderWidth: 8
      },
    ]
  },
  options: {
    legend: {
      display: false,
      position: "top"
    }
  }
};

module.exports = {
  dashboard24HoursPerformanceChart,
  dashboardEmailStatisticsChart,
  dashboardNASDAQChart
};"""

    f = open(
        r"""C:\Users\CB\Desktop\hack\emtech-project\src\views\Dashboard.jsx""",
        "w+")
    f.write(dashboard)
    f.close()
    e = open(
        r"""C:\Users\CB\Desktop\hack\emtech-project\src\variables\charts.jsx""",
        "w+")
    e.write(charts)
    e.close()
    os.system('npm start')