def count_with_negation(fin_dict, transcript):
    """
    Count positive and negative words with negation check. Account for simple negation only for positive words.
    negation is occurring within three words preceding a positive words.
    """
    pos_count = 0
    neg_count = 0
 
    pos_words = []
    neg_words = []

    input_words = re.findall(r'\b([a-zA-Z]+n\'t|[a-zA-Z]+\'s|[a-zA-Z]+)\b', transcript.lower())
 
    word_count = len(input_words)
  

    for i in range(0, word_count):
      if input_words[i] in fin_dict['Negative']:
       
        neg_count += 1
        neg_words.append(input_words[i])
      if input_words[i] in fin_dict['Positive']:
        if i >= 3:
          if negated(input_words[i - 1]) or negated(input_words[i - 2]) or negated(input_words[i - 3]):
            neg_count += 1
            neg_words.append(input_words[i] + ' (with negation)')
          else:
            pos_count += 1
            pos_words.append(input_words[i])
        elif i == 2:
          if negated(input_words[i - 1]) or negated(input_words[i - 2]):
            neg_count += 1
            neg_words.append(input_words[i] + ' (with negation)')
          else:
              pos_count += 1
              pos_words.append(input_words[i])
        elif i == 1:
          if negated(input_words[i - 1]):
                neg_count += 1
                neg_words.append(input_words[i] + ' (with negation)')
          else:
                pos_count += 1
                pos_words.append(input_words[i])
        elif i == 0:
              pos_count += 1
              pos_words.append(input_words[i])
 
    results = [word_count, pos_count, neg_count, pos_words, neg_words]
 
    return results
Example #2
0
def no(strg):
    # return int(''.join(filter(str.isdigit, strg)))
    return re.findall(r"[-+]?\d*\.\d+|\d+", strg)[0]
Example #3
0
#### loop for finding the site and email of each resort
loop_counter=1
for resort in resorts:
    # looking for the website
    ## frist googling for this website
    search_page         =fetch_results(resort +' website',10,'en')
    search_page         =BeautifulSoup(search_page,'lxml')
    sites_in_results    =search_page.findAll('div',{'class':'r'})
    sites_in_results    =[tag.a['href'] for tag in sites_in_results]
    resort_site         =desired_link(sites_in_results,resort)

    # looking for email
    ## frist googling for email
    search_page         =fetch_results(resort+' email',10,'en')
    emails_in_results   =re.findall(email_pattern,search_page)
    
    if len(emails_in_results)==0 :
        print()
        driver.get(resort_site)
        sleep(1)
        search_page=driver.page_source
        emails_in_results=search_page.findall(email_pattern)
        if(len(emails_in_results)==0):
            search_page=BeautifulSoup(search_page,'lxml')
            contact_us=search_page.findAll('a')
            contact_us=[tag['href'] for tag in contact_us]
            contact_us=desired_link(contact_us,'contact us')
            driver.get(contact_us)
            emails_in_results=search_page.findall(email_pattern)
            if(len(emails_in_results)==0):
from bs4 import BeautifulSoup
import urllib.request
from bs4 import re

page = urllib.request.urlopen('http://services.housing.berkeley.edu/FoodPro/dining/static/todaysentrees.asp')

soup = BeautifulSoup(page)

print(soup)

items = soup.find_all('font')


print('BREAK')

for item in items:
    print(item)
    
food = re.findall(r'>[\w\s]*<',str(items)) #Looks for items between brackets
food_list = []

for item in food:
    item = item[1:len(item)-1]
    print(item)
    food_list.append(item)

food_list = food_list[12:]
print(food_list)