Ejemplo n.º 1
0
def simi(path_file_1, path_file_2):
    list_A = []
    list_folder = []
    list_result = []

    folder = glob.glob1(path_file_2, '*' + g_.gv_format_csv)
    # print(folder)

    for file in folder:
        if file == g_.gv_profile_config + g_.gv_format_csv:
            print(g_.gv_space)
        elif g_.gc_folder_path + g_.gv_profile_path + file != path_file_1:
            list_result.append(file)
            file_content = db_.read_file(
                g_.gc_folder_path + g_.gv_profile_path + file, g_.gv_null)
            list_folder.append(file_content)
            # print(list_folder)
        else:
            file_content = db_.read_file(
                g_.gc_folder_path + g_.gv_profile_path + file, g_.gv_null)
            list_A.append(file_content)

    vectorizer = TfidfVectorizer(decode_error='ignore',
                                 strip_accents='unicode',
                                 stop_words='english',
                                 ngram_range=(1, 1))
    vectorizer.fit(list_folder)
    vectors = vectorizer.transform(list_A).toarray()
    vectors2 = vectorizer.transform(list_folder).toarray()
    s = cosine_similarity(vectors, vectors2)
    max_index = np.argmax(s)
    # print(list_result[max_index])
    return list_result[max_index]
Ejemplo n.º 2
0
def basic_operations_on_query(query):
    query = re.sub(r"[^a-zA-Z0-9]+", ' ', query)  # Removing special characters
    query = query.strip()  # Removing end white spaces
    # Checking if users words need to be replaced with official words.

    domain_synonym_corpus_list = db_.read_file(
        g_.gc_folder_path + g_.gv_f_domain_syn, g_.gc_newline)

    # Using domain specific keywords
    for line in domain_synonym_corpus_list:
        line_str = line.split(g_.gv_comma)
        if line_str[0] in query:
            query = query.replace(line_str[0], line_str[1] + ' ')

    # Removing the stop words.
    stop_words = db_.read_file(g_.gc_folder_path + g_.gv_f_stop_words,
                               g_.gc_newline)
    word_list_query = query.split(g_.gv_space)

    for word in word_list_query:
        if word in stop_words:
            word = g_.gv_space + word + g_.gv_space
            query = query.replace(word, g_.gv_space)

    query = g_.gv_space.join(query.split())
    return query
Ejemplo n.º 3
0
def user_exists(username):
    all_content = db_.read_file(
        g_.gc_folder_path + g_.gv_profile_path + g_.gv_profile_config +
        g_.gv_format_csv, g_.gc_newline)

    for line in all_content:
        if username == line.split(g_.gv_comma)[0]:
            return 0  # Exists

    return 1
Ejemplo n.º 4
0
def get_text_elements(gv_language):
    if gv_language != g_.gv_english and gv_language != g_.gv_german:
        gv_language = g_.gv_english

    gl_texts_dump = db_.read_file(g_.gc_folder_path + g_.gv_text_elements,
                                  g_.gc_newline)  # DB
    gl_texts = []

    if gl_texts_dump != g_.gv_null:
        for text_element in gl_texts_dump:
            elements = text_element.split(g_.gv_comma)
            if elements[1] == gv_language:
                if elements[2] == g_.gv_yes_en:
                    g_.gv_yes = elements[3]
                if elements[2] == g_.gc_no_en:
                    g_.gv_no = elements[3]
                gl_texts.append(elements[2] + g_.gv_comma + elements[3])
                elements.clear()
        #gl_texts.sort()    #- is sorting required?
    return gl_texts
Ejemplo n.º 5
0
def get_label(query):
    # Training the model.
    vectorizer = CountVectorizer(decode_error='ignore',
                                 strip_accents='unicode',
                                 stop_words='english',
                                 ngram_range=(2, 3))
    list_of_queries = []
    list_of_labels = []
    list_new_query = []

    one_question = db_.read_file(g_.gc_folder_path + g_.gv_f_queries,
                                 g_.gc_newline)

    for question in one_question:
        line_format = question.split(g_.gv_comma)
        try:
            question_text = line_format[1]
            list_of_queries.append(question_text)
            list_of_labels.append(line_format[2])
        except:
            ui_.sys_response(lv_null)

    list_new_query.append(query)

    len_list_of_queries = len(list_of_queries)
    vectorizer.fit(list_of_queries)
    vectors = vectorizer.transform(list_of_queries)
    vectors_test = vectorizer.transform(list_new_query)

    vectorised = vectors.toarray()
    vectorised_test = vectors_test.toarray()
    classifier = MultinomialNB()
    classifier.fit(vectorised, list_of_labels)
    classifier.predict(vectorised_test)

    label = classifier.predict(vectorised_test)[0]

    return label
Ejemplo n.º 6
0
def make_profile(matr):
    lv_text_elements_profile = 'profile_elements'
    lv_profile_pref = 'profile_'
    lv_chat_file_pref = 'chat_rec_'
    lv_yes = ',yes'
    lv_q_username = '******'
    lv_q_chat_rec = 'chat_rec'
    lv_q_help_others = 'help_oth'
    lv_q_allow_chat = 'allow_chat'

    all_rel_questions = []
    profile_inf = []

    # Getting the Questions from the table.
    all_questions = db_.read_file(g_.gc_folder_path + g_.gv_text_elements,
                                  g_.gc_newline)
    all_questions.sort()

    # Finding the Questions to ask.

    for sentence in all_questions:
        sentence_str = sentence.split(g_.gv_comma)
        if sentence_str[0] == lv_text_elements_profile and sentence_str[
                1] == g_.gv_language:
            all_rel_questions.append(sentence_str[2] + g_.gv_comma +
                                     sentence_str[3])

    #Creates the file for profile.
    for q in all_rel_questions:
        question_elements = q.split(g_.gv_comma)
        field_name = question_elements[0]
        field_val = ui_.user_input(field_name)
        db_.write_file(
            g_.gc_folder_path + g_.gv_profile_path + lv_profile_pref + matr +
            g_.gv_format_csv,
            field_name + g_.gv_comma + field_val + g_.gc_newline)
        profile_inf.append(field_name + g_.gv_comma + field_val)

    # Update Master Profile Config
    update_info_profile_config = ''
    for profile_element in profile_inf:
        profile_element_arr = profile_element.split(',')
        if profile_element_arr[0] == lv_q_username:
            update_info_profile_config = update_info_profile_config + g_.gv_comma + profile_element_arr[
                1]  # Matr
            update_info_profile_config = update_info_profile_config + lv_yes  # ProfileActive: Yes
        elif profile_element_arr[0] == lv_q_chat_rec:
            update_info_profile_config = update_info_profile_config + g_.gv_comma + profile_element_arr[
                1]
        elif profile_element_arr[0] == lv_q_help_others:
            update_info_profile_config = update_info_profile_config + g_.gv_comma + profile_element_arr[1] \
                                     + lv_yes + lv_yes + lv_yes
        elif profile_element_arr[0] == lv_q_allow_chat:
            update_info_profile_config = update_info_profile_config + g_.gv_comma + \
                                     profile_element_arr[1] + lv_yes + lv_yes + lv_yes + lv_yes
        print(update_info_profile_config)
    db_.write_file(
        g_.gc_folder_path + g_.gv_profile_path + g_.gv_profile_config +
        g_.gv_format_csv, g_.gc_newline + update_info_profile_config)

    db_.write_file(
        g_.gc_folder_path + g_.gv_chat_path + lv_chat_file_pref + matr +
        g_.gv_format_csv, 'chat record')
    return matr