def crawl(self, food_detail_df): print('[Recipe Web Crawling Start]') for index in range(len(food_detail_df)): food = food_detail_df.foodName[index] recipe = self, recipe_finder(food, 2) food_detail_df.loc[index, 'foodRecipe'] = str(recipe) if (index + 1) % 5 == 0: print(round((index + 1) / len(food_df) * 100, 2), 'percent Done') print('Complete!!') print('') print('[noun extract start]') food_detail_df['foodRecipeNoun'] = '' for i in range(len(food_detail_df)): doc = food_detail_df.foodRecipe[i] noun = Hannanum().nouns(doc) cnt = Counter(noun) only_word = [] for key, value in cnt.items(): if int(value) < 3: noun.remove(key) for word in noun: m = re.match('^\D*\D$', word) if m: only_word.append(m.group()) food_detail_df.loc[i, 'foodRecipeNoun'] = str(only_word) if (i % 5) == 0: print(round(i / len(food_detail_df) * 100, 2), ' perent done') print('Complete')
def NLP(self, food_detail_df): print('[noun extract start]') food_detail_df['foodRecipeNoun'] = '' for i in range(len(food_detail_df)): doc = food_detail_df.foodRecipe[i] noun = Hannanum().nouns(doc) for word in noun: word = word.replace('ㅎ', '').replace('ㅋ', '').replace( 'ㅜㅜ', '').replace('ㅠㅠ', '').replace('\\n', '') cnt = Counter(noun) only_word = [] for key, value in cnt.items(): #if (len(key) < 2)|(len(key) > 6): #noun.remove(key) if int(value) < 3: noun.remove(key) for word in noun: m = re.match('^\D*\D$', word) if m: only_word.append(m.group()) food_detail_df.loc[i, 'foodRecipeNoun'] = str(only_word) if (i % 5) == 0: print(round(i / len(food_detail_df) * 100, 2), ' perent done') print('Complete')