#******************* Main ************************************************ #________________________Import Data________________________ train = pd.read_csv('train.tsv', sep = '\t') train.head() test = pd.read_csv('test.tsv', sep = '\t',engine = 'python') combined = pd.concat([train,test]) trainSize = len(train) #________________________Data Normailization________________________ #removing missing values Normalizer.missingValues(combined,'brand_name', 'None') Normalizer.missingValues(combined,'item_description', 'None') Normalizer.missingValues(combined,'category_name', 'missing') #force brand_name, category_name, and item_conditon_id value types to be "catergory" combined['brand_name'] = combined['brand_name'].astype('category') combined['category_name'] = combined['category_name'].astype('category') combined['item_condition_id'] = combined['item_condition_id'].astype('category') #force shipping and item_descpritom value types to be "string" combined.shipping = combined['shipping'].astype(str) combined.item_description = combined.item_description.astype(str) #removing punctuation from item description combined.item_description = combined['item_description'].apply(Normalizer.removePunc)