def url_to_sentiment(url): """Takes a URL from the user, """ from newspaper import Article a = Article(url) a.download() a.parse() article = a.text[:] r = str(article) r = r.splitlines() analyzer = SentimentIntensityAnalyzer() a = [ ] #initialize the empty list 'a', where we will store the polarity scores of the individual lines for i in range(0, len(r)): a.append(str(analyzer.polarity_scores(r[i]))) letter_list = [ ] #initialize the list where we will store all the letters of the list of polarity scores #loop through the list of polarity scores and turn the whole thing into one long string called 'my_string' for j in range(0, len(a)): for k in range(0, len(a[j])): letter_list.append((a[j][k])) my_string = ''.join(map(str, letter_list)) #remove some punctuation from 'my_string', leaving } to be used to split into a list later my_string = my_string.replace("'", '') my_string = my_string.replace("{", '') my_string = my_string.replace(",", '') my_string = my_string.replace(' ', ' ') my_string = my_string.replace(': ', ':') #split back into a list of strings with punctuation removed url_list_inp = my_string.split("}") #initialize my lists of values for the four sentiments, neg, neu, pos, and comp neg = [] neu = [] pos = [] comp = [] #scrapes 'my_list' for the values that correspond to each of the sentiments #and sorts them into their respective lists. for g in range(0, len(url_list_inp)): for h in range(0, len(url_list_inp[g])): if (url_list_inp[g][h] == ".") and (url_list_inp[g][h - 5:h - 1] == "neg:"): neg.append(url_list_inp[g][h - 1:h + 3]) if (url_list_inp[g][h] == ".") and (url_list_inp[g][h - 5:h - 1] == "neu:"): neu.append(url_list_inp[g][h - 1:h + 3]) if (url_list_inp[g][h] == ".") and (url_list_inp[g][h - 5:h - 1] == "pos:"): pos.append(url_list_inp[g][h - 1:h + 3]) if (url_list_inp[g][h] == ".") and (url_list_inp[g][h - 5:h - 1] == "und:"): comp.append(url_list_inp[g][h - 1:h + 3]) if (url_list_inp[g][h - 2] == '-'): comp.append(url_list_inp[g][h - 2:h + 3]) #initialize a new group of lists, which will store the values of neg, neu, pos, #after their values are tranformed to floats neg_float = [] neu_float = [] pos_float = [] comp_float = [] index = [] #creates an index for i in range(0, 7211): index.append(i + 1) #scrapes the respective lists, converts them to floats, deposits them #into their respective _float lists. for eins in range(0, len(neg)): neg_float.append(float(neg[eins])) for zwei in range(0, len(neu)): neu_float.append(float(neu[zwei])) for drei in range(0, len(pos)): pos_float.append(float(pos[drei])) for vier in range(0, len(comp)): comp_float.append(float(comp[vier])) #initialzes a new list which will only include from instances where #comp_float i != 0.0 neg_float_new = [] neu_float_new = [] pos_float_new = [] comp_float_new = [] index_new = [] #create an index for i in range(0, 7211): index_new.append(i + 1) #scrape comp_float looking for 0.0 values. if this index value has no #corresponding comp_float value, remove corresponding neg,neu,float vals for i in range(0, len(comp_float)): if (comp_float[i] == 0.0): pass else: neg_float_new.append(neg_float[i]) neu_float_new.append(neu_float[i]) pos_float_new.append(pos_float[i]) comp_float_new.append(comp_float[i]) #calculates the mean of each list, rounding the results to 3 decimal places neg = stat.mean(neg_float_new) neu = stat.mean(neu_float_new) pos = stat.mean(pos_float_new) comp = stat.mean(comp_float_new) x = pos y = neu z = neg url_list_inp = [x, y, z, comp] #print (str(url_list_inp)) return url_list_inp