Ejemplo n.º 1
0
def go():
	data=readFileAndStrip(file,1)
	fdata = [item for sublist in data for item in sublist]
	fdata = [item for sublist in fdata for item in sublist]	#flatten data
	udata = f5(fdata) #unique data
	nelsonlinewrite(udata,outfile,"a")
	return(udata)
Ejemplo n.º 2
0
def go():
    data = readFileAndStrip(file, 1)
    fdata = [item for sublist in data for item in sublist]
    fdata = [item for sublist in fdata for item in sublist]  #flatten data
    udata = f5(fdata)  #unique data
    nelsonlinewrite(udata, outfile, "a")
    return (udata)
Ejemplo n.º 3
0
FILE.close()

# For every track get count of words that match keywords
# ------------------------------------------------------

word = []

for track in tracks:
    word = []  #Clear word buffer
    #read in raw string
    raw = urllib.urlopen(track).read()
    ##EDITS BY NELSON: take apart, remove artist etc, put back together with spaces
    lines = ' '.join(inforemove(raw.split('\n'), infobuzzwords, 10))
    #break up string into words and punctuations
    tokens = nltk.word_tokenize(lines)
    #convert to text for processing with nltk
    text = nltk.Text(tokens)
    #get frequency distribution
    fdist = nltk.FreqDist(text)
    #get distinct items from text
    vocab = fdist.keys()
    word.extend([track])
    #remove stopwords from vocab list
    content = [w for w in vocab if w.lower() not in stopwords]
    word.extend(content[:20])
    #write out top 20 words
    nelsonlinewrite(list(chain(word)), outfile, "a")
    #top_20_words.writerow(word)

print('Huzza!')
FILE.close()

# For every track get count of words that match keywords
# ------------------------------------------------------

word = []

for track in tracks:
    word = []  # Clear word buffer
    # read in raw string
    raw = urllib.urlopen(track).read()
    ##EDITS BY NELSON: take apart, remove artist etc, put back together with spaces
    lines = " ".join(inforemove(raw.split("\n"), infobuzzwords, 10))
    # break up string into words and punctuations
    tokens = nltk.word_tokenize(lines)
    # convert to text for processing with nltk
    text = nltk.Text(tokens)
    # get frequency distribution
    fdist = nltk.FreqDist(text)
    # get distinct items from text
    vocab = fdist.keys()
    word.extend([track])
    # remove stopwords from vocab list
    content = [w for w in vocab if w.lower() not in stopwords]
    word.extend(content[:20])
    # write out top 20 words
    nelsonlinewrite(list(chain(word)), outfile, "a")
    # top_20_words.writerow(word)

print("Huzza!")