def translate_song(self, song_dict): result = dict(SongID=song_dict['file']) for key, tag in dict(SongName='title', ArtistName='artist', AlbumName='album').items(): try: result[key] = util.asciify(song_dict[tag]) except: # This song does not have that key. Probably a .wav pass return result
def bigram_feats(md): c = Counter() for rev in util.MovieData.reviewers: if hasattr(md,rev): # count occurrences of asciified, lowercase, non-numeric unigrams # after removing punctuation wordList = util.punct_patt.sub("", util.asciify(md.__dict__[rev].strip().lower())).split() wordList = [x for x in wordList if util.non_numeric(x)] bigrams = zip(wordList, wordList[1:]) c.update(token for token in bigrams) return c
def revLens(md): """ arguments: md is a util.MovieData object returns: dictionary with word lengths of each reviewer """ d={} for rev in util.MovieData.reviewers: if hasattr(md,rev): revLen = len(util.punct_patt.sub("", util.asciify(md.__dict__[rev].strip().lower())).split()) d[rev+"_length"] = revLen return d
def unigram_feats(md): """ arguments: md is a util.MovieData object returns: a dictionary containing a mapping from unigram features from the reviews to their values on this util.MovieData object """ c = Counter() for rev in util.MovieData.reviewers: if hasattr(md,rev): # count occurrences of asciified, lowercase, non-numeric unigrams # after removing punctuation c.update([token for token in util.punct_patt.sub("", util.asciify(md.__dict__[rev].strip().lower())).split() if util.non_numeric(token)]) return c