def read_csv(input_file): """Reads a tab separated value file.""" df = load_csv(input_file,header=0).fillna('|') jobcontent = df['content'].tolist() jlabel = df.loc[:,hp.label_vocabulary].values lines = [[jlabel[i],jobcontent[i]] for i in range(len(jlabel)) if type(jobcontent[i])==str] return lines
def _read_csv(cls, input_file): """Reads a tab separated value file.""" df = load_csv(input_file, header=0).fillna('|') jobcontent = df['content'].tolist() jlabel = df.loc[:, hp.label_vocabulary].values lines = [[jlabel[i], jobcontent[i]] for i in range(len(jlabel)) if type(jobcontent[i]) == str] random.shuffle(lines) print('Read csv finished!(1)') print('Head data:', lines[0:5]) print('Length of data:', len(lines)) return lines