def go(): data=readFileAndStrip(file,1) fdata = [item for sublist in data for item in sublist] fdata = [item for sublist in fdata for item in sublist] #flatten data udata = f5(fdata) #unique data nelsonlinewrite(udata,outfile,"a") return(udata)
def go(): data = readFileAndStrip(file, 1) fdata = [item for sublist in data for item in sublist] fdata = [item for sublist in fdata for item in sublist] #flatten data udata = f5(fdata) #unique data nelsonlinewrite(udata, outfile, "a") return (udata)
FILE.close() # For every track get count of words that match keywords # ------------------------------------------------------ word = [] for track in tracks: word = [] #Clear word buffer #read in raw string raw = urllib.urlopen(track).read() ##EDITS BY NELSON: take apart, remove artist etc, put back together with spaces lines = ' '.join(inforemove(raw.split('\n'), infobuzzwords, 10)) #break up string into words and punctuations tokens = nltk.word_tokenize(lines) #convert to text for processing with nltk text = nltk.Text(tokens) #get frequency distribution fdist = nltk.FreqDist(text) #get distinct items from text vocab = fdist.keys() word.extend([track]) #remove stopwords from vocab list content = [w for w in vocab if w.lower() not in stopwords] word.extend(content[:20]) #write out top 20 words nelsonlinewrite(list(chain(word)), outfile, "a") #top_20_words.writerow(word) print('Huzza!')
FILE.close() # For every track get count of words that match keywords # ------------------------------------------------------ word = [] for track in tracks: word = [] # Clear word buffer # read in raw string raw = urllib.urlopen(track).read() ##EDITS BY NELSON: take apart, remove artist etc, put back together with spaces lines = " ".join(inforemove(raw.split("\n"), infobuzzwords, 10)) # break up string into words and punctuations tokens = nltk.word_tokenize(lines) # convert to text for processing with nltk text = nltk.Text(tokens) # get frequency distribution fdist = nltk.FreqDist(text) # get distinct items from text vocab = fdist.keys() word.extend([track]) # remove stopwords from vocab list content = [w for w in vocab if w.lower() not in stopwords] word.extend(content[:20]) # write out top 20 words nelsonlinewrite(list(chain(word)), outfile, "a") # top_20_words.writerow(word) print("Huzza!")