Example #1
0
def what_words_am_i_missing(cv, job_ids):
    job_descriptions = get_docs_by_job_id(job_ids)
    important_words_freqs = manual_tfidf.new_run_tfidf(job_descriptions)
    important_words = [(word, freq) for (word, freq) in important_words_freqs if word not in cv]
    print important_words
Example #2
0
# -*- coding: utf-8 -*-

import MySQLdb as mdb
import sys
import argparse
import re
from argparse import ArgumentParser
import os.path
import manual_tfidf

## Get all the jobs out of the database
with open('user_password.txt') as f:
    username, password = f.readline().strip().split(':')
    con = mdb.connect('10.100.95.207', username, password, 'test');
    with con:
        cur = con.cursor()
        all_descriptions_sql = "SELECT search_id, job_title, job_description from test.job_results LIMIT 10"
        cur.execute(all_descriptions_sql)
        id_title_description = [[entry[0], entry[1], entry[2]] for entry in cur.fetchall()]

all_descriptions = [entry[2].strip().decode('utf-8', 'ignore').lower().replace(',', ' ').strip() for entry in id_title_description if len(entry[2].strip().decode('utf-8', 'ignore')) > 25]
new_descriptions = []
better_descriptions = [thing.split(' ') for thing in all_descriptions]
for index in range(len(better_descriptions)):
    new_document = ' '.join([word.strip().replace(',',' ').replace('\t', ' ').strip() for word in better_descriptions[index] if len(word.strip().replace(',',' ').replace('\t', ' ').strip()) > 1])
    new_descriptions.append(new_document)


manual_tfidf.new_run_tfidf(new_descriptions)