Ejemplo n.º 1
0
def app():
    file = open('data.json', 'w')
    json_string = '{"name":"'

    conversation_name = raw_input("Enter a name for this conversation: ")
    description = raw_input("Enter a description for this conversation: ")

    json_string += conversation_name
    json_string += '", '
    json_string += '"intents":['
    #Get intent and training data from user
    json_string += get_intent()

    #Use AlchemyAPI to get keywords so that users can get an idea for what kind of entities they should create
    get_analysis = raw_input(
        "Would you like to use a few sentences to help you come up with entities? (Y/N) "
    )
    if get_analysis == 'y' or get_analysis == 'Y':
        get_keywords()

    #Users create entities and values, and then get synonyms to those values
    json_string += '"entities":['
    json_string += get_entities()

    json_string += '"language:":"en","metadata":null,"description":"'
    json_string += description
    json_string += '",'
    json_string += '"dialog_nodes":[]}'

    json_obj = json.dumps(json_string)

    file.write(json_string)
Ejemplo n.º 2
0
Archivo: api.py Proyecto: tkf/jedi
    def get_definition(self):
        """
        Returns the definitions of a the path under the cursor. This is
        not a goto function! This follows complicated paths and returns the
        end, not the first definition.

        :return: list of Definition objects, which are basically scopes.
        :rtype: list
        """
        def resolve_import_paths(scopes):
            for s in scopes.copy():
                if isinstance(s, imports.ImportPath):
                    scopes.remove(s)
                    scopes.update(resolve_import_paths(set(s.follow())))
            return scopes

        goto_path = self.module.get_path_under_cursor()

        context = self.module.get_context()
        if next(context) in ('class', 'def'):
            scopes = set([self.module.parser.user_scope])
        elif not goto_path:
            op = self.module.get_operator_under_cursor()
            scopes = set([keywords.get_operator(op, self.pos)] if op else [])
        else:
            scopes = set(self._prepare_goto(goto_path))

        scopes = resolve_import_paths(scopes)

        # add keywords
        scopes |= keywords.get_keywords(string=goto_path, pos=self.pos)

        d = set([Definition(s) for s in scopes])
        return sorted(d, key=lambda x: (x.module_path, x.start_pos))
Ejemplo n.º 3
0
Archivo: server.py Proyecto: ufal/ker
def process_file(file_path, tagger, idf_doc_count, idf_table, threshold,
                 maximum_words):
    """
    Takes the uploaded file, detecs its type (plain text, alto XML, zip)
    and calls a parsing function accordingly. If everything succeeds it
    returns keywords and 200 code, returns an error otherwise.
    """
    file_info = magic.from_file(file_path)
    lines = []
    if re.match("^UTF-8 Unicode (with BOM) text", file_info):
        lines = lines_from_txt_file(file_path, encoding='utf-8-sig')
    elif re.match("^UTF-8 Unicode", file_info):
        lines = lines_from_txt_file(file_path, encoding='utf-8')
    elif re.match("^ASCII text", file_info):
        lines = lines_from_txt_file(file_path, encoding='utf-8')
    elif re.match('^XML 1.0 document', file_info) and \
            (file_path.endswith('.alto') or file_path.endswith('.xml')):
        lines = lines_from_alto_file(file_path)
    elif re.match('^Zip archive data', file_info):
        lines = lines_from_zip_file(file_path)
    else:
        return {"eror": "Unsupported file type: {}".format(file_info)}, 400

    if not lines:
        return {"error": "Empty file"}, 400
    return keywords.get_keywords(lines, tagger, idf_doc_count, idf_table,
                                 threshold, maximum_words), 200
Ejemplo n.º 4
0
 def on_get(self, req, resp, start, end):
     """All tracked keywords in the database.
     Returns a sorted list with the keywords and their counts.
     Takes the "group" GET parameters for the keyword group.
     """
     global KEYWORDS, keywords_sync_time
     if (time() - keywords_sync_time) > 60 * 60:
         KEYWORDS = get_keywords(local=True)
         keywords_sync_time = time()
     query = {
         "num_keywords": {"$gt": 0},
         "datetime": {"$gte": start, "$lt": end}
     }
     group = req.get_param("group")
     if group:
         del query["num_keywords"]
         query["groups"] = group
     tw = tweets.find(query, projection={"keywords": True, "_id": False})
     counts = Counter()
     for t in tw:
         kws = t["keywords"]
         if group:
             keywords = []
             for kw in kws:
                 if kw in KEYWORDS:
                     if group in KEYWORDS[kw].groups:
                         keywords.append(kw)
             kws = keywords
         counts.update(kws)
     data = [{"keyword": kw, "count": c} for kw, c in counts.most_common()]
     resp.body = json.dumps(data)
Ejemplo n.º 5
0
def answer_question(teacher_chat_id, parent_chat_id, answer):
    with connection.cursor() as cursor:
        query = "SELECT * FROM parentsQuestions"
        cursor.execute(query)
        result = cursor.fetchone()
        if result is not None:
            ques = "'" + result['question'] + "'"
            lques = [result['question']]
            keywords = get_keywords(lques)
            query2 = f"INSERT INTO QA VALUES({result['chat_id']}, '{answer}', {ques}, '{keywords}')"
            cursor.execute(query2)
            query3 = f"DELETE FROM parentsQuestions WHERE question = {ques}"
            cursor.execute(query3)
            connection.commit()
            requests.get(
                TELEGRAM_SEND_MESSAGE_URL.format(
                    TOKEN, parent_chat_id, "The teacher says:\n" + answer))
            requests.get(
                TELEGRAM_SEND_MESSAGE_URL.format(
                    TOKEN, teacher_chat_id,
                    "Is this a general or a private question?"
                    "(general/ private)"))
        else:
            requests.get(
                TELEGRAM_SEND_MESSAGE_URL.format(
                    TOKEN, teacher_chat_id,
                    "There are no questions to answer! "
                    "Thank you and have a nice day! :) "))
    return ques
Ejemplo n.º 6
0
    def complete(self):
        """
        An auto completer for python files.

        :return: list of Completion objects.
        :rtype: list
        """
        path = self.module.get_path_until_cursor()
        path, dot, like = self._get_completion_parts(path)

        try:
            scopes = list(self._prepare_goto(path, True))
        except NotFoundError:
            scopes = []
            scope_generator = evaluate.get_names_for_scope(
                                            self.parser.user_scope, self.pos)
            completions = []
            for scope, name_list in scope_generator:
                for c in name_list:
                    completions.append((c, scope))
        else:
            completions = []
            debug.dbg('possible scopes', scopes)
            for s in scopes:
                # TODO is this really the right way? just ignore the funcs? \
                # do the magic functions first? and then recheck here?
                if not isinstance(s, evaluate.Function):
                    if isinstance(s, imports.ImportPath):
                        names = s.get_defined_names(on_import_stmt=True)
                    else:
                        names = s.get_defined_names()
                    for c in names:
                        completions.append((c, s))

        if not dot:  # named_params have no dots
            call_def = self.get_in_function_call()
            if call_def:
                if not call_def.module.is_builtin():
                    for p in call_def.params:
                        completions.append((p.get_name(), p))

            # Do the completion if there is no path before and no import stmt.
            if (not scopes or not isinstance(scopes[0], imports.ImportPath)) \
                        and not path:
                # add keywords
                bs = builtin.Builtin.scope
                completions += ((k, bs) for k in keywords.get_keywords(
                                                                    all=True))

        completions = [(c, s) for c, s in completions
                        if settings.case_insensitive_completion
                            and c.names[-1].lower().startswith(like.lower())
                            or c.names[-1].startswith(like)]

        needs_dot = not dot and path
        completions = set(completions)

        c = [Completion(c, needs_dot, len(like), s) for c, s in completions]

        return c
Ejemplo n.º 7
0
def build_model005(df):
    from keywords import get_keywords

    x_cols = ['salary_min', 'salary_max', 'title', 'abstract']

    # print('!' * 80)
    # no_min = df['salary_min'].isnull()
    # no_min_max = df['salary_max'][no_min]
    # print('no_min_max')
    # print(no_min_max)
    # no_max = df['salary_max'].isnull()
    # no_max_min = df['salary_min'][no_max]
    # print('no_max_min')
    # print(no_max_min)

    has_minmax = df['salary_min'].notnull() & df['salary_max'].notnull()
    df = df[has_minmax]

    df_train, df_test = split_train_test(df)

    X, y = getXy(df_train, x_cols)
    X_test, _ = getXy(df_test, x_cols)

    # X.dropna(how='all', inplace=True)

    keywords = get_keywords(50)

    X = add_keywords(X, 'title', keywords['title'])
    X = add_keywords(X, 'abstract', keywords['abstract'])
    X_test = add_keywords(X_test, 'title', keywords['title'])
    X_test = add_keywords(X_test, 'abstract', keywords['abstract'])

    return X, y, X_test
Ejemplo n.º 8
0
Archivo: server.py Proyecto: ufal/ker
def process_file(file_path, tagger, idf_doc_count, idf_table, threshold, maximum_words):
    """
    Takes the uploaded file, detecs its type (plain text, alto XML, zip)
    and calls a parsing function accordingly. If everything succeeds it
    returns keywords and 200 code, returns an error otherwise.
    """
    file_info = magic.from_file(file_path)
    lines = []
    if re.match("^UTF-8 Unicode (with BOM) text", file_info):
        lines = lines_from_txt_file(file_path, encoding='utf-8-sig')
    elif re.match("^UTF-8 Unicode", file_info):
        lines = lines_from_txt_file(file_path, encoding='utf-8')
    elif re.match("^ASCII text", file_info):
        lines = lines_from_txt_file(file_path, encoding='utf-8')
    elif re.match('^XML 1.0 document', file_info) and \
            (file_path.endswith('.alto') or file_path.endswith('.xml')):
        lines = lines_from_alto_file(file_path)
    elif re.match('^Zip archive data', file_info):
        lines = lines_from_zip_file(file_path)
    else:
        return {"eror": "Unsupported file type: {}".format(file_info)}, 400

    if not lines:
        return {"error": "Empty file"}, 400
    return keywords.get_keywords(lines, tagger, idf_doc_count, idf_table, threshold, maximum_words), 200
Ejemplo n.º 9
0
    def goto_definitions(self):
        """
        Return the definitions of a the path under the cursor.  goto function!
        This follows complicated paths and returns the end, not the first
        definition. The big difference between :meth:`goto_assignments` and
        :meth:`goto_definitions` is that :meth:`goto_assignments` doesn't
        follow imports and statements. Multiple objects may be returned,
        because Python itself is a dynamic language, which means depending on
        an option you can have two different versions of a function.

        :rtype: list of :class:`api_classes.Definition`
        """

        def resolve_import_paths(scopes):
            for s in scopes.copy():
                if isinstance(s, imports.ImportPath):
                    scopes.remove(s)
                    scopes.update(resolve_import_paths(set(s.follow())))
            return scopes

        goto_path = self._module.get_path_under_cursor()

        context = self._module.get_context()
        scopes = set()
        lower_priority_operators = ("()", "(", ",")
        """Operators that could hide callee."""
        if next(context) in ("class", "def"):
            scopes = set([self._module.parser.user_scope])
        elif not goto_path:
            op = self._module.get_operator_under_cursor()
            if op and op not in lower_priority_operators:
                scopes = set([keywords.get_operator(op, self.pos)])

        # Fetch definition of callee
        if not goto_path:
            (call, _) = self._func_call_and_param_index()
            if call is not None:
                while call.next is not None:
                    call = call.next
                # reset cursor position:
                (row, col) = call.name.end_pos
                self.pos = (row, max(col - 1, 0))
                self._module = modules.ModuleWithCursor(self._source_path, source=self.source, position=self.pos)
                # then try to find the path again
                goto_path = self._module.get_path_under_cursor()

        if not scopes:
            if goto_path:
                scopes = set(self._prepare_goto(goto_path))
            elif op in lower_priority_operators:
                scopes = set([keywords.get_operator(op, self.pos)])

        scopes = resolve_import_paths(scopes)

        # add keywords
        scopes |= keywords.get_keywords(string=goto_path, pos=self.pos)

        d = set([api_classes.Definition(s) for s in scopes if not isinstance(s, imports.ImportPath._GlobalNamespace)])
        return self._sorted_defs(d)
Ejemplo n.º 10
0
def extract_keywords():
    document = request.get_json()['text']

    response = Response(
        response=json.dumps(list(get_keywords(document))),
        status=200,
        mimetype="application/json",
    )

    return response
Ejemplo n.º 11
0
def main(tweets):
    tweet_clusters = scipy_clustering(tweets)
    print len(tweet_clusters), len(tweets)
    for number, cluster in enumerate(tweet_clusters[:10]):
        print "These are the keywords in cluster:", number + 1, "Length:", len(
            cluster["list"]), "Score:", cluster["score"]
        print kw.reduce_text_list(
            kw.get_keywords(kw.reduce_text_list(cluster["list"], as_list=True),
                            5))
        print ""
def get_tweet(dict_tweet):
    tweet = ""
    tweet += "Tweet por @%s\n" % (dict_tweet["user"]["name"])
    tweet += "%s\n" % (dict_tweet["text"])
    try:
        tweet += "%s" % (dict_tweet["entities"]["urls"][0]["url"])
    except:
        pass

    keys = keywords.get_keywords(dict_tweet["text"])
    # keys = " ".join(keys)
    # url_image = list(get_google_image.get_scrapped_image(keys))[0]
    url_image = ""
    return tweet, url_image
Ejemplo n.º 13
0
def answer_add_question(answer, chat_id):
    global the_question_to_answer
    with connection.cursor() as cursor:
        query = f"SELECT * FROM QA WHERE question='{the_question_to_answer}' and chat_id={chat_id}"
        cursor.execute(query)
        result = cursor.fetchone()
        if result is None:
            lquest = [the_question_to_answer]
            keywords = get_keywords(lquest)
            query = f"INSERT INTO QA VALUES({chat_id}, '{answer}', '{the_question_to_answer}' , '{keywords}')"
            cursor.execute(query)
            connection.commit()
            requests.get(
                TELEGRAM_SEND_MESSAGE_URL.format(
                    TOKEN, chat_id, "question added successfully!"))
    the_question_to_answer = ''
Ejemplo n.º 14
0
def ask_question(question, chat_id, class_):
    first_question = question
    similar_questions = []
    with connection.cursor() as cursor:
        question = "'" + question + "'"
        lquestion = [question]
        keywords = get_keywords(lquestion)
        query = f"SELECT * FROM QA"
        cursor.execute(query)
        res = cursor.fetchall()
        split_keywords = keywords.split()
        for result in res:
            db_keywords = result['keywords'].split()
            for key in split_keywords:
                if key in db_keywords:
                    similar_questions.append(result['question'])
                    break
        if len(similar_questions) != 0:
            requests.get(
                TELEGRAM_SEND_MESSAGE_URL.format(
                    TOKEN, chat_id, "It seems we have similar questions:\n"
                    "Write  %<number>"
                    "  to choose the question you want\n"))
            for i, question in enumerate(similar_questions):
                requests.get(
                    TELEGRAM_SEND_MESSAGE_URL.format(
                        TOKEN, chat_id,
                        str(i) + ")" + " " + question + "\n"))
            requests.get(
                TELEGRAM_SEND_MESSAGE_URL.format(
                    TOKEN, chat_id,
                    "If you can't find your question, write 'none'"))
        else:
            requests.get(
                TELEGRAM_SEND_MESSAGE_URL.format(
                    TOKEN, chat_id, "No similar questions were asked,"
                    " we will send your question to the teacher. Please wait for the answer.\n"
                ))
            query = f"INSERT INTO parentsQuestions VALUES({chat_id},'{first_question}')"
            cursor.execute(query)
            ask_question2(first_question, class_)
        connection.commit()
    return chat_id, first_question, similar_questions
Ejemplo n.º 15
0
    def get_definition(self):
        """
        Returns the definitions of a the path under the cursor. This is
        not a goto function! This follows complicated paths and returns the
        end, not the first definition.
        The big difference of goto and get_definition is that goto doesn't
        follow imports and statements.
        Multiple objects may be returned, because Python itself is a dynamic
        language, which means depending on an option you can have two different
        versions of a function.

        :return: list of Definition objects, which are basically scopes.
        :rtype: list
        """
        def resolve_import_paths(scopes):
            for s in scopes.copy():
                if isinstance(s, imports.ImportPath):
                    scopes.remove(s)
                    scopes.update(resolve_import_paths(set(s.follow())))
            return scopes

        goto_path = self.module.get_path_under_cursor()

        context = self.module.get_context()
        if next(context) in ('class', 'def'):
            scopes = set([self.module.parser.user_scope])
        elif not goto_path:
            op = self.module.get_operator_under_cursor()
            scopes = set([keywords.get_operator(op, self.pos)] if op else [])
        else:
            scopes = set(self._prepare_goto(goto_path))

        scopes = resolve_import_paths(scopes)

        # add keywords
        scopes |= keywords.get_keywords(string=goto_path, pos=self.pos)

        d = set([
            api_classes.Definition(s) for s in scopes
            if not isinstance(s, imports.ImportPath._GlobalNamespace)
        ])
        return sorted(d, key=lambda x: (x.module_path, x.start_pos))
Ejemplo n.º 16
0
Archivo: api.py Proyecto: omab/dotfiles
    def get_definition(self):
        """
        Returns the definitions of a the path under the cursor. This is
        not a goto function! This follows complicated paths and returns the
        end, not the first definition.
        The big difference of goto and get_definition is that goto doesn't
        follow imports and statements.
        Multiple objects may be returned, because Python itself is a dynamic
        language, which means depending on an option you can have two different
        versions of a function.

        :return: list of Definition objects, which are basically scopes.
        :rtype: list
        """
        def resolve_import_paths(scopes):
            for s in scopes.copy():
                if isinstance(s, imports.ImportPath):
                    scopes.remove(s)
                    scopes.update(resolve_import_paths(set(s.follow())))
            return scopes

        goto_path = self.module.get_path_under_cursor()

        context = self.module.get_context()
        if next(context) in ('class', 'def'):
            scopes = set([self.module.parser.user_scope])
        elif not goto_path:
            op = self.module.get_operator_under_cursor()
            scopes = set([keywords.get_operator(op, self.pos)] if op else [])
        else:
            scopes = set(self._prepare_goto(goto_path))

        scopes = resolve_import_paths(scopes)

        # add keywords
        scopes |= keywords.get_keywords(string=goto_path, pos=self.pos)

        d = set([api_classes.Definition(s) for s in scopes
                    if not isinstance(s, imports.ImportPath._GlobalNamespace)])
        return sorted(d, key=lambda x: (x.module_path, x.start_pos))
Ejemplo n.º 17
0
def build_model004(df):
    from keywords import get_keywords

    x_cols = ['title', 'abstract']

    df_train, df_test = split_train_test(df)

    X, y = getXy(df_train, x_cols)
    X_test, _ = getXy(df_test, x_cols)

    keywords = get_keywords(50)
    print('keywords=%s' % keywords)

    print('X before=%s:%s' % (list(X.shape), X.columns))

    X = add_keywords(X, 'title', keywords['title'])
    X = add_keywords(X, 'abstract', keywords['abstract'])
    X_test = add_keywords(X_test, 'title', keywords['title'])
    X_test = add_keywords(X_test, 'abstract', keywords['abstract'])

    print('X after =%s:%s' % (list(X.shape), X.columns))

    return X, y, X_test
Ejemplo n.º 18
0
 def get_award(self, file_path):
     print "Converting xml file: " + file_path
     
     tree = ET.parse(file_path)
     root = tree.getroot()
     award = root.find('Award')
     
     # create a new AwardItem object from the xml data 
     awardItem = AwardItem()
             
     awardItem['Title'] = award.find('AwardTitle').text
     awardItem['Date'] = award.find('AwardEffectiveDate').text
     
     amount = award.find('AwardAmount')        
     if amount is not None:
         awardItem['Amount'] = amount.text
     
     # add institution
     institution = award.find('Institution')
     if institution is not None:        
         awardItem['Institution'] = institution.find('Name').text
     
     # add the authors
     division = award.find('Division')
     if division is not None:
         awardItem['Division'] = division.find('LongName').text
     
     investigator = award.find('Investigator')
     if investigator is not None:        
         awardItem['Investigator'] = investigator.find('FirstName').text + " " + investigator.find('LastName').text + " | " + investigator.find('EmailAddress').text
     
     abstract = award.find('AbstractNarration')
     if abstract is not None and abstract.text is not None:
         awardItem['Abstract'] = abstract.text
         awardItem['Keywords'] = get_keywords(abstract.text)      
     
     return awardItem            
Ejemplo n.º 19
0
def answer_the_last_question(answer, teacher_chat_id):
    with connection.cursor() as cursor:
        query = "SELECT * FROM parentsQuestionsQueue LIMIT 1"
        cursor.execute(query)
        result = cursor.fetchone()
        if result is not None:
            requests.get(
                TELEGRAM_SEND_MESSAGE_URL.format(
                    TOKEN, result['chat_id'],
                    "The answer for your last question is :\n" + answer))
            answer = "'" + answer + "'"
            ques = "'" + result['question'] + "'"
            lques = [result['question']]
            keywords = get_keywords(lques)
            query2 = f"INSERT INTO QA VALUES({teacher_chat_id}, {answer}, {ques}, '{keywords}')"
            cursor.execute(query2)
            query3 = f"DELETE FROM parentsQuestionsQueue WHERE question = {ques}"
            cursor.execute(query3)
            connection.commit()
        else:
            requests.get(
                TELEGRAM_SEND_MESSAGE_URL.format(
                    TOKEN, teacher_chat_id,
                    "THERE IS NO QUESTION TO ANSWER!!"))
Ejemplo n.º 20
0
    def on_data(self, data):
        try:
            json_data = json.loads(data)
            json_send_data = self.json_filter(json_data)

            json_send_data['senti_val'] = afinn.score(json_data['text'])
            json_send_data['sentiment'] = self.fun(
                afinn.score(json_data['text']))
            json_send_data['keywords'] = keywords.get_keywords(
                json_data['text']) + keywords.extract_hashtag(
                    json_data['text'])
            print(json_send_data['text'], " >>>>>>>> ",
                  json_send_data['keywords'], " >>>>>>>> ",
                  json_send_data['sentiment'])

            self.producer.send_messages(b'twitter', json.dumps(json_send_data))

            client_mongo = pymongo.MongoClient('localhost', 27017)
            db = client_mongo['dicdatabase']
            db.test.insert_one(json_send_data)

            return True
        except KeyError:
            return True
Ejemplo n.º 21
0
        default="ptrs",
        choices=["ptrs", "make_ie_data", "prep_gen_data"],
        help="what utility function to run",
    )
    parser.add_argument("--min-freq", type=int, default=0)
    parser.add_argument("--test", action="store_true", help="use test data")
    parser.add_argument("--lang",
                        type=str,
                        choices=["en", "de"],
                        help="language",
                        default="de")

    args = parser.parse_args()

    LANG = args.lang
    Keywords = get_keywords(LANG)

    if args.mode == "ptrs":
        make_pointerfi(args.output_fi, inp_file=args.input_path)
    elif args.mode == "make_ie_data":
        save_full_sent_data(
            args.output_fi,
            path=args.input_path,
            multilabel_train=True,
            min_freq=args.min_freq,
        )
    elif args.mode == "prep_gen_data":
        prep_generated_data(
            args.gen_fi,
            args.dict_pfx,
            args.output_fi,
Ejemplo n.º 22
0
def kws():
    text = request.args.get('text', None)
    return json.dumps({'keywords': keywords.get_keywords(text)})
Ejemplo n.º 23
0
    def completions(self):
        """
        Return :class:`api_classes.Completion` objects. Those objects contain
        information about the completions, more than just names.

        :return: Completion objects, sorted by name and __ comes last.
        :rtype: list of :class:`api_classes.Completion`
        """
        debug.speed('completions start')
        path = self._module.get_path_until_cursor()
        if re.search('^\.|\.\.$', path):
            return []
        path, dot, like = self._get_completion_parts(path)
        completion_line = self._module.get_line(self.pos[0])[:self.pos[1]]

        try:
            scopes = list(self._prepare_goto(path, True))
        except NotFoundError:
            scopes = []
            scope_generator = evaluate.get_names_of_scope(
                                            self._parser.user_scope, self.pos)
            completions = []
            for scope, name_list in scope_generator:
                for c in name_list:
                    completions.append((c, scope))
        else:
            completions = []
            debug.dbg('possible scopes', scopes)
            for s in scopes:
                if s.isinstance(er.Function):
                    names = s.get_magic_method_names()
                else:
                    if isinstance(s, imports.ImportPath):
                        if like == 'import':
                            if not completion_line.endswith('import import'):
                                continue
                        a = s.import_stmt.alias
                        if a and a.start_pos <= self.pos <= a.end_pos:
                            continue
                        names = s.get_defined_names(on_import_stmt=True)
                    else:
                        names = s.get_defined_names()

                for c in names:
                    completions.append((c, s))

        if not dot:  # named params have no dots
            for call_def in self.call_signatures():
                if not call_def.module.is_builtin():
                    for p in call_def.params:
                        completions.append((p.get_name(), p))

            # Do the completion if there is no path before and no import stmt.
            u = self._parser.user_stmt
            bs = builtin.Builtin.scope
            if isinstance(u, pr.Import):
                if (u.relative_count > 0 or u.from_ns) and not re.search(
                            r'(,|from)\s*$|import\s+$', completion_line):
                    completions += ((k, bs) for k
                                            in keywords.get_keywords('import'))

            if not path and not isinstance(u, pr.Import):
                # add keywords
                completions += ((k, bs) for k in keywords.get_keywords(
                                                                    all=True))

        needs_dot = not dot and path

        comps = []
        comp_dct = {}
        for c, s in set(completions):
            n = c.names[-1]
            if settings.case_insensitive_completion \
                    and n.lower().startswith(like.lower()) \
                    or n.startswith(like):
                if not evaluate.filter_private_variable(s,
                                                    self._parser.user_stmt, n):
                    new = api_classes.Completion(c, needs_dot,
                                                    len(like), s)
                    k = (new.name, new.complete)  # key
                    if k in comp_dct and settings.no_completion_duplicates:
                        comp_dct[k]._same_name_completions.append(new)
                    else:
                        comp_dct[k] = new
                        comps.append(new)

        debug.speed('completions end')

        return sorted(comps, key=lambda x: (x.name.startswith('__'),
                                            x.name.startswith('_'),
                                            x.name.lower()))
Ejemplo n.º 24
0
    def goto_definitions(self):
        """
        Return the definitions of a the path under the cursor.  goto function!
        This follows complicated paths and returns the end, not the first
        definition. The big difference between :meth:`goto_assignments` and
        :meth:`goto_definitions` is that :meth:`goto_assignments` doesn't
        follow imports and statements. Multiple objects may be returned,
        because Python itself is a dynamic language, which means depending on
        an option you can have two different versions of a function.

        :rtype: list of :class:`api_classes.Definition`
        """
        def resolve_import_paths(scopes):
            for s in scopes.copy():
                if isinstance(s, imports.ImportPath):
                    scopes.remove(s)
                    scopes.update(resolve_import_paths(set(s.follow())))
            return scopes

        goto_path = self._module.get_path_under_cursor()

        context = self._module.get_context()
        scopes = set()
        lower_priority_operators = ('()', '(', ',')
        """Operators that could hide callee."""
        if next(context) in ('class', 'def'):
            scopes = set([self._module.parser.user_scope])
        elif not goto_path:
            op = self._module.get_operator_under_cursor()
            if op and op not in lower_priority_operators:
                scopes = set([keywords.get_operator(op, self.pos)])

        # Fetch definition of callee
        if not goto_path:
            (call, _) = self._func_call_and_param_index()
            if call is not None:
                while call.next is not None:
                    call = call.next
                # reset cursor position:
                (row, col) = call.name.end_pos
                self.pos = (row, max(col - 1, 0))
                self._module = modules.ModuleWithCursor(self._source_path,
                                                        source=self.source,
                                                        position=self.pos)
                # then try to find the path again
                goto_path = self._module.get_path_under_cursor()

        if not scopes:
            if goto_path:
                scopes = set(self._prepare_goto(goto_path))
            elif op in lower_priority_operators:
                scopes = set([keywords.get_operator(op, self.pos)])

        scopes = resolve_import_paths(scopes)

        # add keywords
        scopes |= keywords.get_keywords(string=goto_path, pos=self.pos)

        d = set([
            api_classes.Definition(s) for s in scopes
            if not isinstance(s, imports.ImportPath._GlobalNamespace)
        ])
        return self._sorted_defs(d)
Ejemplo n.º 25
0
Archivo: api.py Proyecto: omab/dotfiles
    def complete(self):
        """
        An auto completer for python files.

        :return: list of Completion objects, sorted by name and __ comes last.
        :rtype: list
        """
        def follow_imports_if_possible(name):
            # TODO remove this, or move to another place (not used)
            par = name.parent
            if isinstance(par, parsing.Import) and not \
                        isinstance(self.parser.user_stmt, parsing.Import):
                new = imports.ImportPath(par).follow(is_goto=True)
                # Only remove the old entry if a new one has been found.
                #print par, new, par.parent
                if new:
                    try:
                        return new
                    except AttributeError:  # .name undefined
                        pass
            return [name]

        debug.speed('complete start')
        path = self.module.get_path_until_cursor()
        if re.search('^\.|\.\.$', path):
            return []
        path, dot, like = self._get_completion_parts(path)

        try:
            scopes = list(self._prepare_goto(path, True))
        except NotFoundError:
            scopes = []
            scope_generator = evaluate.get_names_for_scope(
                                            self.parser.user_scope, self.pos)
            completions = []
            for scope, name_list in scope_generator:
                for c in name_list:
                    completions.append((c, scope))
        else:
            completions = []
            debug.dbg('possible scopes', scopes)
            for s in scopes:
                if s.isinstance(evaluate.Function):
                    names = s.get_magic_method_names()
                else:
                    if isinstance(s, imports.ImportPath):
                        if like == 'import':
                            l = self.module.get_line(self.pos[0])[:self.pos[1]]
                            if not l.endswith('import import'):
                                continue
                        names = s.get_defined_names(on_import_stmt=True)
                    else:
                        names = s.get_defined_names()

                for c in names:
                    completions.append((c, s))

        if not dot:  # named_params have no dots
            call_def = self.get_in_function_call()
            if call_def:
                if not call_def.module.is_builtin():
                    for p in call_def.params:
                        completions.append((p.get_name(), p))

            # Do the completion if there is no path before and no import stmt.
            if (not scopes or not isinstance(scopes[0], imports.ImportPath)) \
                        and not path:
                # add keywords
                bs = builtin.Builtin.scope
                completions += ((k, bs) for k in keywords.get_keywords(
                                                                    all=True))

        needs_dot = not dot and path

        comps = []
        for c, s in set(completions):
            n = c.names[-1]
            if settings.case_insensitive_completion \
                    and n.lower().startswith(like.lower()) \
                    or n.startswith(like):
                if not evaluate.filter_private_variable(s,
                                                    self.parser.user_stmt, n):
                    new = api_classes.Completion(c, needs_dot,
                                                    len(like), s)
                    comps.append(new)

        debug.speed('complete end')

        return sorted(comps, key=lambda x: (x.word.startswith('__'),
                                            x.word.startswith('_'),
                                            x.word.lower()))
Ejemplo n.º 26
0
""""
Utility for converting JSON-formatted review data into tabular form.
"""
import json
from tqdm import tqdm
from keywords import get_keywords

file_name = "Clothing_Shoes_and_Jewelry_5.json"
col_names = ["id", "text", "rating"]
tsv_name = file_name.replace("json", "tsv")
output = open(tsv_name, 'w')
output.write("\t".join(["id", "rating", "keyword", "text"]))
output.close()

i = 0
with open(file_name) as f:
    for line in tqdm(f, desc="Reading from JSON"):
        # do stuff with this one line
        review = json.loads(line)
        text = review["reviewText"]
        rating = review["overall"]
        keyword = get_keywords(text)
        output = open(tsv_name, 'a')
        output.write("\n")
        output.write("\t".join([str(i), str(rating), keyword, text]))
        output.close()
        i += 1
        # for smaller sets, use:
        # if i >= 25:
        #     break
Ejemplo n.º 27
0
def produce_v2_vector_row(rid, result, doctype, keyword_coordinates,
                          found_entities):

    if "ENTITY TYPE" not in result:
        return None

    found_entity_type = resolve_entity_type(result, found_entities)
    entity_type = result.get("CHILD ENTITY TYPE", result.get("ENTITY TYPE"))

    keywords = get_keywords(doctype)
    row = get_empty_row(keywords)

    datapoint = result["value"]["value"]

    row["rid"] = rid
    row["label"] = result["NAME"]
    page_num = get_attr_list(["selection_input", "page", "document_index"],
                             result)
    page_num += 1
    page_name = f"page_{page_num}"

    if page_name not in keyword_coordinates:
        page_name = list(keyword_coordinates.keys())[0]

    keywords_on_the_page = keyword_coordinates[page_name]
    x_normal, y_normal = (
        keywords_on_the_page["page_size"]["x"],
        keywords_on_the_page["page_size"]["y"],
    )

    center_coordinates = {"x0": 0, "y0": 0, "x1": x_normal, "y1": y_normal}

    row["entity_id"] = None
    row["page_number"] = float(page_num)
    row["entity_type"] = entity_type
    row["doctype"] = doctype

    row["currency"] = coerce_currency(datapoint, row["entity_type"])

    row["date"] = dateable(row["datapoint"], common_date_patterns,
                           row["entity_type"])

    datapoint_coordinates = get_attr_list(["selection_input", "pos_original"],
                                          result)

    # No coordinates? No vector
    if datapoint_coordinates is None:
        return None

    distance_af_center = calculate_distance_and_angle(center_coordinates,
                                                      datapoint_coordinates,
                                                      x_normal, y_normal)

    row["df_center"] = distance_af_center["distance"]
    row["af_center"] = distance_af_center["angle"]

    for keyword_value, keyword_coordinates_lst in keywords_on_the_page[
            "keyword_coordinates"].items():
        vector_keyword = find_closest_coordinates(
            keyword_coordinates_lst,
            datapoint_coordinates,
            x_normal,
            y_normal,
        )
        row[f"dfk_{keyword_value}"] = vector_keyword["distance"]
        row[f"afk_{keyword_value}"] = vector_keyword["angle"]

    return row
Ejemplo n.º 28
0
def produce_tagged_vector_row(rid, last_row, doctype, keyword_coordinates):
    page_name = get_default_page_name(last_row["page_number"],
                                      list(keyword_coordinates.keys()))
    keywords = get_keywords(doctype)
    row = get_empty_row(keywords)

    if page_name not in keyword_coordinates:
        page_name = list(keyword_coordinates.keys())[0]

    keywords_on_the_page = keyword_coordinates[page_name]
    x_normal, y_normal = (
        keywords_on_the_page["page_size"]["x"],
        keywords_on_the_page["page_size"]["y"],
    )
    center_coordinates = {"x0": 0, "y0": 0, "x1": x_normal, "y1": y_normal}

    datapoint = last_row["content"]

    row["rid"] = rid
    row["label"] = last_row["datapoint"]
    row["entity_id"] = last_row["id"]
    row["page_number"] = float(page_name.replace("page_", ""))
    row["doctype"] = doctype
    row["entity_type"] = last_row["entity_type"]

    row["currency"] = coerce_currency(last_row["content"], row["entity_type"])
    row["date"] = dateable(last_row["content"], common_date_patterns,
                           row["entity_type"])

    if type(last_row["coordinates"]) != str:
        return False

    x0, y0, x1, y1 = tuple(
        float(x) for x in last_row["coordinates"].split(" "))

    datapoint_coordinates = {"y0": y0, "x0": x0, "y1": y1, "x1": x1}

    distance_af_center = calculate_distance_and_angle(center_coordinates,
                                                      datapoint_coordinates,
                                                      x_normal, y_normal)

    row["currency"] = coerce_currency(datapoint, row["entity_type"])
    row["date"] = dateable(last_row["content"], common_date_patterns,
                           row["entity_type"])

    distance_af_center = calculate_distance_and_angle(center_coordinates,
                                                      datapoint_coordinates,
                                                      x_normal, y_normal)

    row["df_center"] = distance_af_center["distance"]
    row["af_center"] = distance_af_center["angle"]

    for keyword_value, keyword_coordinates_lst in keywords_on_the_page[
            "keyword_coordinates"].items():
        vector_keyword = find_closest_coordinates(
            keyword_coordinates_lst,
            datapoint_coordinates,
            x_normal,
            y_normal,
        )
        row[f"dfk_{keyword_value}"] = vector_keyword["distance"]
        row[f"afk_{keyword_value}"] = vector_keyword["angle"]

    return row
Ejemplo n.º 29
0
def produce_found_vector_row(rid,
                             entity,
                             doctype,
                             keyword_coordinates,
                             label=0):
    addr_mapper = {
        "AddressNumber": "ROUTE",
        "StreetName": "ROUTE",
        "StreetNamePostType": "ROUTE",
        "StreetNamePreDirectional": "ROUTE",
        "PlaceName": "CITY",
        "StateName": "STATE",
        "ZipCode": "ZIP",
    }
    datapoint = entity["CONTENT"]["CONTENT"]["value"]
    entity_type = entity["ENTITY_TYPE"]
    if entity_type == "ADDRESS":
        parsed_address = usaddress.parse(datapoint)
        ent_type_set = list(
            set(addr_mapper.get(tup[1], "ADDRESS") for tup in parsed_address))
        if len(ent_type_set) == 1:
            entity_type = ent_type_set[0]
    if entity_type == "ADDRESS":
        print("# ", datapoint)

    keywords = get_keywords(doctype)
    row = get_empty_row(keywords)
    page_num = entity["PAGE"]

    page_name = f"page_{page_num}"
    if page_name not in keyword_coordinates:
        page_name = list(keyword_coordinates.keys())[0]

    keywords_on_the_page = keyword_coordinates[page_name]
    x_normal, y_normal = (
        keywords_on_the_page["page_size"]["x"],
        keywords_on_the_page["page_size"]["y"],
    )

    center_coordinates = {"x0": 0, "y0": 0, "x1": x_normal, "y1": y_normal}

    row["rid"] = rid
    row["label"] = label
    row["entity_id"] = entity["ENTITY_ID"]
    row["page_number"] = float(page_num)
    row["entity_type"] = entity_type
    row["doctype"] = doctype
    row["currency"] = coerce_currency(datapoint, entity_type)
    row["date"] = dateable(entity["CONTENT"]["CONTENT"]["value"],
                           common_date_patterns, entity_type)

    # Try to match up with tagged entities
    datapoint_coordinates = entity["CONTENT"]["CONTENT"]["coordinates"]

    distance_af_center = calculate_distance_and_angle(center_coordinates,
                                                      datapoint_coordinates,
                                                      x_normal, y_normal)
    row["df_center"], row["af_center"] = (
        distance_af_center["distance"],
        distance_af_center["angle"],
    )
    for keyword_value, keyword_coordinates_lst in keywords_on_the_page[
            "keyword_coordinates"].items():
        vector_keyword = find_closest_coordinates(
            keyword_coordinates_lst,
            datapoint_coordinates,
            x_normal,
            y_normal,
        )
        row[f"dfk_{keyword_value}"], row[f"afk_{keyword_value}"] = (
            vector_keyword["distance"],
            vector_keyword["angle"],
        )
    return row
Ejemplo n.º 30
0
    def completions(self):
        """
        Return :class:`api_classes.Completion` objects. Those objects contain
        information about the completions, more than just names.

        :return: Completion objects, sorted by name and __ comes last.
        :rtype: list of :class:`api_classes.Completion`
        """
        debug.speed('completions start')
        path = self._module.get_path_until_cursor()
        if re.search('^\.|\.\.$', path):
            return []
        path, dot, like = self._get_completion_parts(path)
        completion_line = self._module.get_line(self.pos[0])[:self.pos[1]]

        try:
            scopes = list(self._prepare_goto(path, True))
        except NotFoundError:
            scopes = []
            scope_generator = evaluate.get_names_of_scope(
                self._parser.user_scope, self.pos)
            completions = []
            for scope, name_list in scope_generator:
                for c in name_list:
                    completions.append((c, scope))
        else:
            completions = []
            debug.dbg('possible scopes', scopes)
            for s in scopes:
                if s.isinstance(er.Function):
                    names = s.get_magic_method_names()
                else:
                    if isinstance(s, imports.ImportPath):
                        if like == 'import':
                            if not completion_line.endswith('import import'):
                                continue
                        a = s.import_stmt.alias
                        if a and a.start_pos <= self.pos <= a.end_pos:
                            continue
                        names = s.get_defined_names(on_import_stmt=True)
                    else:
                        names = s.get_defined_names()

                for c in names:
                    completions.append((c, s))

        if not dot:  # named params have no dots
            for call_def in self.call_signatures():
                if not call_def.module.is_builtin():
                    for p in call_def.params:
                        completions.append((p.get_name(), p))

            # Do the completion if there is no path before and no import stmt.
            u = self._parser.user_stmt
            bs = builtin.Builtin.scope
            if isinstance(u, pr.Import):
                if (u.relative_count > 0 or u.from_ns) and not re.search(
                        r'(,|from)\s*$|import\s+$', completion_line):
                    completions += ((k, bs)
                                    for k in keywords.get_keywords('import'))

            if not path and not isinstance(u, pr.Import):
                # add keywords
                completions += ((k, bs)
                                for k in keywords.get_keywords(all=True))

        needs_dot = not dot and path

        comps = []
        comp_dct = {}
        for c, s in set(completions):
            n = c.names[-1]
            if settings.case_insensitive_completion \
                    and n.lower().startswith(like.lower()) \
                    or n.startswith(like):
                if not evaluate.filter_private_variable(
                        s, self._parser.user_stmt, n):
                    new = api_classes.Completion(c, needs_dot, len(like), s)
                    k = (new.name, new.complete)  # key
                    if k in comp_dct and settings.no_completion_duplicates:
                        comp_dct[k]._same_name_completions.append(new)
                    else:
                        comp_dct[k] = new
                        comps.append(new)

        debug.speed('completions end')

        return sorted(
            comps,
            key=lambda x:
            (x.name.startswith('__'), x.name.startswith('_'), x.name.lower()))
Ejemplo n.º 31
0
from time import time

import falcon
import ujson as json

from keywords import get_db, get_keywords
from hortiradar import admins, users, time_format
from hortiradar.database import stop_words
from hortiradar.clustering import Config


db = get_db()
tweets = db.tweets
groups = db.groups

KEYWORDS = get_keywords(local=True)
keywords_sync_time = time()

spam_level = Config.getfloat("database:parameters", "spam_level")

def get_dates(req, resp, resource, params):
    """Parse the `start` and `end` datetime parameters."""
    try:
        today = datetime.today()
        today = datetime(today.year, today.month, today.day)
        start = req.get_param("start")
        if start:
            start = datetime.strptime(start, time_format)
        else:
            start = today - timedelta(days=1)
        end = req.get_param("end")
Ejemplo n.º 32
0
# -*- coding: utf-8 -*-
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from keywords import get_keywords, save_keyword_index
import re
import urllib
import time

if __name__ == '__main__':
    browser = webdriver.PhantomJS()
    try:
        keyword_list = get_keywords()
        for k in keyword_list:
            keyword = k[1].decode('utf8')
            keyword = urllib.quote(keyword.encode('utf8'))
            browser.get(
                'http://zhishu.sogou.com/index/searchHeat?kwdNamesStr=%s&timePeriodType=MONTH&dataType=SEARCH_ALL&queryType=INPUT'
                % keyword)
            try:
                r = re.findall(
                    r'root.SG.data = {"pvList":\[([\s\S]*)],"infoList"',
                    browser.page_source, re.M)
            except:
                r = ''
            if r:
                points = eval(r[0].split('],"infoList"')[0])
                for p in points:
                    date = str(p['date'])
                    date = date[0:4] + '-' + date[4:6] + '-' + date[6:8]
Ejemplo n.º 33
0
    def complete(self):
        """
        An auto completer for python files.

        :return: list of Completion objects, sorted by name and __ comes last.
        :rtype: list
        """
        def follow_imports_if_possible(name):
            # TODO remove this, or move to another place (not used)
            par = name.parent
            if isinstance(par, parsing.Import) and not \
                        isinstance(self.parser.user_stmt, parsing.Import):
                new = imports.ImportPath(par).follow(is_goto=True)
                # Only remove the old entry if a new one has been found.
                #print par, new, par.parent
                if new:
                    try:
                        return new
                    except AttributeError:  # .name undefined
                        pass
            return [name]

        debug.speed('complete start')
        path = self.module.get_path_until_cursor()
        path, dot, like = self._get_completion_parts(path)

        try:
            scopes = list(self._prepare_goto(path, True))
        except NotFoundError:
            scopes = []
            scope_generator = evaluate.get_names_for_scope(
                self.parser.user_scope, self.pos)
            completions = []
            for scope, name_list in scope_generator:
                for c in name_list:
                    completions.append((c, scope))
        else:
            completions = []
            debug.dbg('possible scopes', scopes)
            for s in scopes:
                if s.isinstance(evaluate.Function):
                    names = s.get_magic_method_names()
                else:
                    if isinstance(s, imports.ImportPath):
                        if like == 'import':
                            l = self.module.get_line(self.pos[0])[:self.pos[1]]
                            if not l.endswith('import import'):
                                continue
                        names = s.get_defined_names(on_import_stmt=True)
                    else:
                        names = s.get_defined_names()

                for c in names:
                    completions.append((c, s))

        if not dot:  # named_params have no dots
            call_def = self.get_in_function_call()
            if call_def:
                if not call_def.module.is_builtin():
                    for p in call_def.params:
                        completions.append((p.get_name(), p))

            # Do the completion if there is no path before and no import stmt.
            if (not scopes or not isinstance(scopes[0], imports.ImportPath)) \
                        and not path:
                # add keywords
                bs = builtin.Builtin.scope
                completions += ((k, bs)
                                for k in keywords.get_keywords(all=True))

        needs_dot = not dot and path

        comps = []
        for c, s in set(completions):
            n = c.names[-1]
            if settings.case_insensitive_completion \
                    and n.lower().startswith(like.lower()) \
                    or n.startswith(like):
                if not evaluate.filter_private_variable(
                        s, self.parser.user_stmt, n):
                    new = api_classes.Completion(c, needs_dot, len(like), s)
                    comps.append(new)

        debug.speed('complete end')

        return sorted(
            comps,
            key=lambda x:
            (x.word.startswith('__'), x.word.startswith('_'), x.word.lower()))
Ejemplo n.º 34
0
def main():
    #just for testing
    print("hi")

    #joe_nouns = (get_words_by_pos('joe-biden', 'noun'))
    #print(joe_nouns)

    #joe_verbs = (get_words_by_pos('joe-biden', 'verb'))

    #joe_adjectives=(get_words_by_pos('joe-biden', 'adjective'))

    #joe_adverbs=(get_words_by_pos('joe-biden', 'adverb'))

    #elizabeth_nouns = open("elizabeth-nouns.txt", "w")
    #elizabeth_nouns=(get_words_by_pos('elizabeth-warren', 'noun'))
    #elizabeth_nouns.close()

    #lizabeth_verbs = open("elizabeth-verbs.txt", "w")
    #elizabeth_verbs=(get_words_by_pos('elizabeth-warren', 'verb'))
    #elizabeth_verbs.close()

    #elizabeth_adjectives = open("elizabeth-adjs.txt", "w")
    #elizabeth_adjectives=(get_words_by_pos('elizabeth-warren', 'adjective'))
    #elizabeth_adjectives.close()

    #lizabeth_adverbs = open("elizabeth-adverbs.txt", "w")
    #elizabeth_adverbs=(get_words_by_pos('elizabeth-warren', 'adverb'))
    #elizabeth_adverbs.close()

    #bernie_nouns = open("bernie-nouns.txt", "w")
    #bernie_nouns=(get_words_by_pos('bernie-sanders', 'noun'))
    #bernie_nouns.close()

    #bernie_verbs = open("bernie-verbs.txt", "w")
    #bernie_verbs=(get_words_by_pos('bernie-sanders', 'verb'))
    #bernie_verbs.close()

    #bernie_adjectives = open("bernie-adjs.txt", "w")
    #bernie_adjectives=(get_words_by_pos('bernie-sanders', 'adjective'))
    #bernie_adjectives.close()

    #bernie_adverbs = open("bernie-adverbs.txt", "w")
    #bernie_adverbs=(get_words_by_pos('bernie-sanders', 'adverb'))
    #bernie_adverbs.close()

    #pete_nouns = open("pete-nouns.txt", "w")
    #pete_nouns=(get_words_by_pos('pete-buttigieg', 'noun'))
    #pete_nouns.close()

    #pete_verbs = open("pete-verbs.txt", "w")
    #pete_verbs=(get_words_by_pos('pete-buttigieg', 'verb'))
    #pete_verbs.close()

    #bernie_adjectives = open("pete-adjs.txt", "w")
    #pete_adjectives=(get_words_by_pos('pete-buttigieg', 'adjective'))
    #

    #pete_adverbs = open("pete-adverbs.txt", "w")
    #pete_adverbs=(get_words_by_pos('pete-buttigieg', 'adverb'))
    #pete_adverbs.close()

    #amy_nouns = open("amy-nouns.txt", "w")
    #amy_nouns=(get_words_by_pos('amy-klobuchar', 'noun'))
    #amy_nouns.close()

    #amy_verbs = open("amy-verbs.txt", "w")
    #amy_verbs=(get_words_by_pos('amy-klobuchar', 'verb'))
    #amy_verbs.close()

    #amy_adjectives = open("amy-adjs.txt", "w")
    #amy_adjectives=(get_words_by_pos('amy-klobuchar', 'adjective'))
    #amy_adjectives.close()

    #amy_adverbs = open("amy-adverbs.txt", "w")
    #amy_adverbs=(get_words_by_pos('amy-klobuchar', 'adverb'))
    #amy_adverbs.close()

    #donald_nouns = open("donald-nouns.txt", "w")
    #donald_nouns=(get_words_by_pos('donald-trump', 'noun'))
    #donald_nouns.close()

    #onald_verbs = open("donald-verbs.txt", "w")
    #donald_verbs=(get_words_by_pos('donald-trump', 'verb'))
    #donald_verbs.close()

    #donald_adjectives = open("donald-adjs.txt", "w")
    #donald_adjectives=(get_words_by_pos('donald-trump', 'adjective'))
    #donald_adjectives.close()

    #donald_adverbs = open("donald-adverbs.txt", "w")
    #donald_adverbs=(get_words_by_pos('donald-trump', 'adverb'))
    #donald_adverbs.close()
    #print(get_most_frequent_words(joe_nouns, 10))
    #print(joe_adjectives)
    #print(get_most_frequent_words(donald_adjectives, 10))
    print('bernie')
    print(map_occurances('bernie-sanders', keywords.get_keywords()))
    print('joe')
    print(map_occurances('joe-biden', keywords.get_keywords()))
    print('donald')
    print(map_occurances('donald-trump', keywords.get_keywords()))
    print('elizabeth-warren')
    print(map_occurances('elizabeth-warren', keywords.get_keywords()))
    print('pete-buttigieg')
    print(map_occurances('pete-buttigieg', keywords.get_keywords()))
    print('amy-klobuchar')
    print(map_occurances('amy-klobuchar', keywords.get_keywords()))
Ejemplo n.º 35
0
def analyze_text_block(text,
                       sentiment_library="textblob",
                       entity_library="spacy",
                       get_sentiment_per_topic=True):
    text = re.sub('\s+', ' ', text)
    text = text.replace("\n", ' ')

    entities_res = entities.get_entities(text, library=entity_library)
    keywords_res = keywords.get_keywords(text)
    sentiment_res = sentiment.get_sentiment(text, library=sentiment_library)
    lemmas_dict = {}

    # Calculate sentiment per lemmas, keywords and entities, by averaging
    # the sentiment for all the sentences that they appear in:
    if get_sentiment_per_topic:
        blob = TextBlob(text)
        for sentence in blob.sentences:
            sentence_score = sentiment.get_sentiment(
                str(sentence), library=sentiment_library)['sentiment.score']

            sentence_lemmas = lemmas.get_lemmas(sentence)

            sentence = str(sentence).lower()

            for lemma in sentence_lemmas:
                lemmatxt = lemma['text']
                if lemmatxt in lemmas_dict.keys():
                    lemmas_dict[lemmatxt]['sentiment.score'].append(
                        sentence_score)
                else:
                    lemmas_dict[lemmatxt] = {
                        'sentiment.score': [sentence_score]
                    }

            for keyword in keywords_res:
                word = keyword['text']
                if word.lower() in sentence:
                    if 'sentiment.score' not in keyword.keys():
                        keyword['sentiment.score'] = []
                    keyword['sentiment.score'].append(sentence_score)

            for entity in entities_res:
                word = entity['text']
                if word.lower() in sentence:
                    if 'sentiment.score' not in entity.keys():
                        entity['sentiment.score'] = []
                    entity['sentiment.score'].append(sentence_score)

        for keyword in keywords_res:
            # WARNING: This is a hack. Happens when we have different libraries not agreeing on sentence boundaries!
            if 'sentiment.score' not in keyword.keys():
                keyword['sentiment.score'] = [sentiment_res['sentiment.score']]

            keyword['num.sentences'] = len(keyword['sentiment.score'])
            keyword['sentiment.score'] = np.mean(keyword['sentiment.score'])

        for entity in entities_res:
            # WARNING: This is a hack. Happens when we have different libraries not agreeing on sentence boundaries!
            if 'sentiment.score' not in entity.keys():
                entity['sentiment.score'] = [sentiment_res['sentiment.score']]

            entity['num.sentences'] = len(entity['sentiment.score'])
            entity['sentiment.score'] = np.mean(entity['sentiment.score'])

        lemmas_res = []
        for lemma in lemmas_dict.keys():
            scores = lemmas_dict[lemma]['sentiment.score']
            lemmas_res.append({
                'text': lemma,
                'num.sentences': len(scores),
                'sentiment.score': np.mean(scores)
            })
    else:
        lemmas_res = lemmas.get_lemmas(text)

    results = {
        'entities': entities_res,
        'sentiment': sentiment_res,
        'keywords': keywords_res,
        'lemmas': lemmas_res
    }

    return (results)