def posting_generator(): jobs = [] #job_title = "software engineer" #job_location = "Pittsburgh, PA" search_url = 'https://www.careerbuilder.com/jobs-software-engineer-in-pittsburgh,pa?keywords=Software+Engineer&location=Pittsburgh%2C+PA' base_url = 'https://www.careerbuilder.com' next_page = urllib.request.urlopen(search_url, None, None) nlu = NLU( _apikey='BU11gy3frJMRMKz4XQ_sPJ_HGF3p-qEr74xUlEVTWvsY', version='2018-03-19' ) def nextPage(soup): print("BREAK 1") next_link = soup.find("a", class_="Next Page") if next_link is not None: print("BREAK 2") next_url = next_link.find_parent("a")['href'] next_page = next_url return next_page else: print("BREAK 3") return 0 while True: soup = BeautifulSoup(next_page, 'html.parser') #next_page = nextPage(soup) for job in soup.find_all('h2'): if job.get('class') == 'job-title show-for-medium-up': url = 'https://www.careerbuilder.com' + job.a['href'] response = nlu.analyze( url=url, features=Features( entities=EntitiesOptions( limit=1000 ), keywords=KeywordsOptions( limit=1000 ), ) ).get_result() jobs.append(response) # jsonprinter(response) yield response next_url = nextPage(soup) if next_url == 0: break else: next_page = urllib.request.urlopen(next_url, None, None) print("END OF PROGRAM!")
def main(): load_dotenv(find_dotenv()) nlu_username = os.environ.get('NLU_USERNAME') nlu_password = os.environ.get('NLU_PASSWORD') nlu = NLU(username=nlu_username, password=nlu_password, version='2017-02-27') result = nlu.analyze(text='I hate galvanize', features=[features.Sentiment()])['sentiment']['document'] print(result['label'], result['score'])
def __init__(self, ): self.MIN_ANSWER_LEN = 5 self.MIN_CONFIDENCE = 0.60 self.SMALL_TALK = ['I see.', 'Got it.', 'Ok', 'Interesting'] self.POSITIVE_REMARK = [ "Good.", "Excellent!", "Sounds great!", "That's awesome!", "Wonderful!" ] self.NEGATIVE_REMARK = [ "I'm sad to hear that.", "That doesn't sound very good.", "I'm sad to hear that.", "ah", "Someone forgot to have their coffee today" ] self.questions = [ 'Tell me about yourself', 'Tell me about a recent project that you worked on', 'What are your greatest weaknesses?', 'What did you dislike the most about your last job?', 'If you were an animal, which one would you want to be?', 'What are your hobbies?', 'What is your greatest professional achievement?', 'Why do you want to work here?', 'What are your strengths?', 'Where do you see yourself in five years?', 'What type of work environment do you prefer?', "What's a time you disagreed with a decision that was made at work?", 'Why was there a gap in your employment?', 'Can you explain why you changed career paths?', 'How do you deal with pressure or stressful situations?', 'What would your first 30, 60, or 90 days look like in this role?', 'What are your salary requirements?', 'How many tennis balls can you fit into a limousine?', 'Are you planning on having children?', 'How many ping pong balls fit on a 737?', 'Describe a difficult work situation / project and how you overcame it', 'How are you different from the competition?', 'Do you take work home with you?', 'How do you view yourself? Whom do you compare yourself to?', 'What motivates you', 'What did you like most about your last job?', 'What did you dislike most about your last job?', 'Why should I take a risk on you?' ] self.text_to_speech = TextToSpeechV1( x_watson_learning_opt_out=True) # Optional flag self.speech_to_text = SpeechToTextV1(x_watson_learning_opt_out=False) self.nlu = NLU(version='2017-02-27') self.TEMPFILE = './temp/output.wav' self.answers, self.sentiments = [], []
def posting_generator(job_title, job_location): current_thread = os.getpid() print("posting generator") print('Current Thread : ', current_thread) jobs = [] #Array of jobs initialized #Initialize the information for the Watson API searchValues = { 'q': job_title, 'l': job_location } #Dictionary of items for watson search_url = (base_url + urllib.parse.urlencode(searchValues) ) #Website to crawl with search items #Select the first page next_page = urllib.request.urlopen(search_url, None, None) nlu = NLU(iam_apikey='BU11gy3frJMRMKz4XQ_sPJ_HGF3p-qEr74xUlEVTWvsY', version='2018-03-19') while True: #TODO: find out what this does soup = BeautifulSoup(next_page, 'html.parser') #TODO: Find out what find_all does for job in soup.find_all('div'): if job.get('data-tn-component') is not None and job.get( 'data-tn-component') == 'organicJob': url = 'https://www.indeed.com' + job.a['href'] response = nlu.analyze( url=url, features=Features( entities=EntitiesOptions(limit=LIMIT), keywords=KeywordsOptions(limit=LIMIT), )).get_result() jobs.append(response) #jsonprinter(response) #yield url, response saveHTML(url) next_url = nextPage(soup) if next_url == 0: break else: next_page = urllib.request.urlopen(next_url, None, None) print("END OF PROGRAM!")
def get_letter(filename): # tests resume parser class resume = CoverLetterParser(filename) text = resume.parse() nlu = NLU(iam_apikey='BU11gy3frJMRMKz4XQ_sPJ_HGF3p-qEr74xUlEVTWvsY', version='2018-03-19') response = nlu.analyze(language='en', text=text, features=Features( entities=EntitiesOptions(limit=1000), keywords=KeywordsOptions(limit=1000), )).get_result() # prints the text analysis from Watson nlu # jsonprinter(response) return response
from moodmarbles.twitter.base import USERNAME from moodmarbles.twitter.base import PASSWORD from watson_developer_cloud import NaturalLanguageUnderstandingV1 as NLU from watson_developer_cloud.natural_language_understanding_v1 import Features from watson_developer_cloud.natural_language_understanding_v1 import EmotionOptions # Construct the twitter API client # from os set environment variables _API = twitter.Api(consumer_key=os.environ[CONSUMER_KEY], consumer_secret=os.environ[CONSUMER_SECRET], access_token_key=os.environ[ACCESS_TOKEN], access_token_secret=os.environ[ACCESS_SECRET]) # Construct the watson NLU unit _NLU = NLU(version='2018-03-14', username=os.environ[USERNAME], password=os.environ[PASSWORD]) # Get tweets with a certain hashtag def get_tweets_with_hashtag(hashtag, count): # If there is already a file for those tweets, # use the cached version if os.path.exists('%s.json'%hashtag): data = json.load(open('%s.json'%hashtag, 'r')) data = data[:count] if len(data) > count else data return data # Otherwise pull from twitter tweets = _API.GetSearch(term=hashtag, include_entities=True, count=count) analyses = [] for tweet in tweets: try: # Get the sentiment of the tweet
print("Imprimindo a extração URL :{}".format(url)) ''' #Concatenar jsonString a = json.loads(url) b = json.loads(version) c = json.loads(apikey) d = json.loads(model_id) ab = dict(a.items() + b.items()) cd = dict(c.items() + d.items()) jsonMerged = dict.items() print(json.dumps(a, indent=2)) ''' natural_language = NLU(version=version,iam_apikey=apikey,url=url) ''' def _ler_pdf_file(ler_pdf): pdf_file = ler_pdf ler_pdf = PyPDF2.PdfFileReader(pdf_file) conteudo = '' for x in range(ler_pdf.getNumPages()): pagina = ler_pdf.getPage(x) #print("Página Numero: {}".format(str(1+ler_pdf.getPageNumber(pagina)))) conteudo = pagina.extractText() return conteudo # Chamando a função para extrair de pdf para txt pdf_file = _ler_pdf_file('Decisao_Judicial_Med.pdf')