def read_file(self):
     data = pd.read_csv(self.file_path, encoding='utf-8')  # 读取数据
     print("data:")
     # 逐个元素判断是否为空值,将空值行,放入一个队列中
     for i in range(len(data)):
         print("i:", i)
         user_id = ""  # 用户ID
         service_id = ""  # 服务ID
         date_time = ""  # 格式化时间
         time_stamp = ""  # 时间戳
         activity = ""  # 行为
         content = ""  # 内容
         key_words = []  # 内容关键词(自然语言做的分词)
         retweet = 0  # 转推数
         like = 0  # 喜欢数
         reply_num = 0  # 回复数
         row = data.iloc[i]  # 数据元组
         # print("type:", type(row), "row: ", row)
         for j in data.columns:
             j = str(j)
             # if str(data.iloc[i][j]) == "nan":
             #     print("空值", row, j, "element:", data.iloc[i][j])
             #     self.nan_list.append(row)
             #     continue
             element = str(data.iloc[i][j])
             if j == "用户ID":
                 user_id = element
                 print(j, ":", user_id)
             if j == "服务ID":
                 service_id = element
                 print(j, ":", service_id)
             if j == "时间":
                 if str(data.iloc[i][j]) == "nan":
                     self.nan_list.append(row)
                     continue
                 if self.match_timestamp(element):
                     print("10位数字的时间戳")
                     time_stamp = int(self.match_timestamp(element))
                     # 将时间戳timestamp转换成格式化的字符串Datetime
                     l_time = time.localtime(time_stamp)
                     date_time = time.strftime("%Y-%m-%d %H:%M:%S", l_time)
                     print("date_time:", date_time, ", timestamp:",
                           time_stamp)
                 else:
                     if "上午" in element:
                         element = element.replace("上午", "")
                         # 格式化的字符串转换成Datetime
                         print("时间:", element)
                         dt = datetime.datetime.strptime(
                             element, "%H:%M - %Y年%m月%d日")
                         date_time = str(dt)
                         print("时间-:", date_time)
                         # 转化成时间戳
                         timeArray = time.strptime(date_time,
                                                   "%Y-%m-%d %H:%M:%S")
                         # 2将"2011-09-28 10:00:00"转化为时间戳
                         timestamp = int(time.mktime(timeArray))
                         print("timestamp:", timestamp)
                         time_stamp = timestamp
                     if "下午" in element:
                         element = element.replace("下午", "")
                         # 格式化的字符串转换成Datetime
                         dt = datetime.datetime.strptime(
                             element, "%H:%M - %Y年%m月%d日")
                         print("dt:", dt)
                         # 加上12小时
                         aDay = timedelta(days=0.5)
                         now = dt + aDay
                         print("new now:", now)
                         element = str(now)
                         date_time = element
                         # 再变成时间戳
                         timeArray = time.strptime(element,
                                                   "%Y-%m-%d %H:%M:%S")
                         # 2将"2011-09-28 10:00:00"转化为时间戳
                         timestamp = int(time.mktime(timeArray))
                         print("timestamp:", timestamp)
                         time_stamp = timestamp
             if j == "行为":
                 activity = element
                 print(j, ":", activity)
             if j == "内容":
                 # 判断是否有空值
                 if str(data.iloc[i][j]) == "nan":
                     # print("空值", row, j, "element:", data.iloc[i][j])
                     self.nan_list.append(row)
                     continue
                 element = element.replace("Twitter", "")
                 element = element.replace("的", "")
                 element = element.replace(' “@', "")
                 content = element
                 print(j, ":", content)
                 if activity is "":
                     if user_id in content:
                         activity = "Post"
                         print("行为:", activity)
                     else:
                         activity = "Reply"
                         print("行为:", activity)
                 temp_keywords = dpt4.main(element)  # keywords是一个List结构
                 print("keywords:", temp_keywords)
                 key_words = key_words + temp_keywords
             if j == "转推":
                 retweet = element
                 print(j, ":", retweet)
             if j == "喜欢":
                 like = element
                 print(j, ":", like)
             if j == "回复":
                 reply_num = element
                 print(j, ":", reply_num)
             # end if
         _id = self.get_next_counter()
         print("_id:", _id)
         # 输出每一行的input_list
         insert_text = {
             "uid": self.collection_name,
             "用户ID": user_id,
             "服务ID": service_id,
             "时间": date_time,
             "timestamp": time_stamp,
             "activity": activity,
             "内容": content,
             "keywords": key_words,
             "转推": retweet,
             "喜欢": like,
             "回复": reply_num,
             "_id": _id
         }
         print("row_input_list:", insert_text)
         # 插入数据库
         if self.flag_insert == "1":
             self.input_database(insert_text)
         print()
     # end for, 判断是不是有空值的元组
     if self.nan_list:
         for var_nan in self.nan_list:
             print("NaN row:", var_nan)
         print("空值的个数:", len(self.nan_list))
예제 #2
0
 def read_file(self):
     data = pd.read_csv(self.file_path, encoding='utf-8')  # 读取数据
     print("data:")
     # 逐个元素判断是否为空值,将空值行,放入一个队列中
     for i in range(len(data)):
         print("i:", i)
         user_id = ""  # 用户ID
         service_id = ""  # 服务ID
         date_time = ""  # 格式化时间
         time_stamp = ""  # 时间戳
         content = ""  # 内容
         key_words = []  # 内容关键词(自然语言做的分词)
         row = data.iloc[i]  # 数据元组
         # print("type:", type(row), "row: ", row)
         for j in data.columns:
             j = str(j)
             # if str(data.iloc[i][j]) == "nan":
             #     print("空值", row, j, "element:", data.iloc[i][j])
             #     self.nan_list.append(row)
             #     continue
             element = str(data.iloc[i][j])
             if j == "用户ID":
                 user_id = element
                 print(j, ":", user_id)
             if j == "服务ID":
                 service_id = element
                 print(j, ":", service_id)
             if j == "时间":
                 if str(data.iloc[i][j]) == "nan":
                     # print("空值", row, j, "element:", data.iloc[i][j])
                     self.nan_list.append(row)
                     continue
                 print(j, ":", element)
                 # 格式化时间
                 date_time = self.format_datetime(element)
                 print("格式化时间date_time:", date_time)
                 # 转化成时间戳
                 timeArray = time.strptime(date_time, "%Y-%m-%d %H:%M:%S")
                 # 2将"2011-09-28 10:00:00"转化为时间戳
                 timestamp = int(time.mktime(timeArray))
                 print("timestamp:", timestamp)
                 time_stamp = timestamp
             if j == "行为":
                 activity = element
                 print(j, ":", activity)
             if j == "内容" or j == "title":
                 # 判断是否有空值
                 if str(data.iloc[i][j]) == "nan":
                     # print("空值", row, j, "element:", data.iloc[i][j])
                     self.nan_list.append(row)
                     continue
                 content = element
                 print(j, ":", content)
                 temp_keywords = dpt4.main(element)   # keywords是一个List结构
                 print("keywords:", temp_keywords)
                 key_words = key_words + temp_keywords
             # end if
         # 输出每一行的input_list
         _id = self.get_next_counter()
         insert_text = {"uid": self.collection_name, "用户ID": user_id,
                        "服务ID": service_id, "时间": date_time,
                        "timestamp": time_stamp, "内容": content,
                        "keywords": key_words, "_id": _id
                        }
         print("row_input_list:", insert_text)
         # 插入数据库
         if self.flag_insert == "1":
             self.input_database(insert_text)
         print()
     # end for, 判断是不是有空值的元组
     if self.nan_list:
         for var_nan in self.nan_list:
             print("NaN row:", var_nan)
         print("空值的个数:", len(self.nan_list))
 def read_file(self):
     data = pd.read_csv(self.file_path, encoding='utf-8')  # 读取数据
     print("data:")
     # 逐个元素判断是否为空值,将空值行,放入一个队列中
     for i in range(len(data)):
         print("i:", i)
         user_id = ""  # 用户ID
         service_id = ""  # 服务ID
         date_time = ""  # 格式化时间
         time_stamp = ""  # 时间戳
         activity = ""  # 行为
         content = ""  # 内容
         key_words = []  # 内容关键词(自然语言做的分词)
         title_text = ""  # 标题
         star_badge = ""  # 奖励
         row = data.iloc[i]  # 数据元组
         # print("type:", type(row), "row: ", row)
         for j in data.columns:
             j = str(j)
             # if str(data.iloc[i][j]) == "nan":
             #     print("空值", row, j, "element:", data.iloc[i][j])
             #     self.nan_list.append(row)
             #     continue
             element = str(data.iloc[i][j])
             if j == "用户ID":
                 user_id = element
                 print(j, ":", user_id)
             if j == "服务ID":
                 service_id = element
                 print(j, ":", service_id)
             if j == "时间":
                 if str(data.iloc[i][j]) == "nan":
                     # print("空值", row, j, "element:", data.iloc[i][j])
                     self.nan_list.append(row)
                     continue
                 element = element.replace("Z", "")
                 element = element.replace("T", " ")
                 date_time = element
                 print(j, ":", date_time)
                 # 将格式化时间转换成时间戳10位
                 # 1中间过程,一般都需要将字符串转化为时间数组
                 try:
                     timeArray = time.strptime(element, "%Y-%m-%d %H:%M:%S")
                 except ValueError:
                     element = element.replace(" GM", "")
                     element = element.replace("Mon, ", "")
                     element = element.replace("Tue, ", "")
                     element = element.replace("Wed, ", "")
                     element = element.replace("Thu, ", "")
                     element = element.replace("Fri, ", "")
                     element = element.replace("Sat, ", "")
                     element = element.replace("Sun, ", "")
                     timeArray = time.strptime(element, "%Y-%m-%d")
                 # 2将"2011-09-28 10:00:00"转化为时间戳
                 timestamp = int(time.mktime(timeArray))
                 print("timestamp:", timestamp, " type:", type(timestamp))
                 time_stamp = timestamp
             if j == "行为":
                 activity = element
                 print(j, ":", activity)
             if j == "内容":
                 content = element
                 print(j, ":", content)
                 temp_keywords = dpt4.main(element)  # keywords是一个List结构
                 print("keywords:", temp_keywords)
                 key_words = key_words + temp_keywords
             if j == "title":
                 title_text = element
                 print(j, ":", title_text)
                 temp_keywords = dpt4.main(title_text)  # keywords是一个List结构
                 print("keywords:", temp_keywords)
                 key_words = key_words + temp_keywords
             if j == "标记":
                 star_badge = element
                 print(j, ":", star_badge)
             if j == "repository":
                 content = element
                 print(j, ":", content)
                 temp_keywords = dpt4.main(element)  # keywords是一个List结构
                 print("keywords:", temp_keywords)
                 key_words = key_words + temp_keywords
             # end if
             # 调用Switch结构
         # 输出每一行的input_list
         _id = self.get_next_counter()
         insert_text = {
             "uid": self.collection_name,
             "用户ID": user_id,
             "服务ID": service_id,
             "时间": date_time,
             "timestamp": time_stamp,
             "activity": activity,
             "内容": content,
             "keywords": key_words,
             "_id": _id
         }
         print("row_input_list:", insert_text)
         # 插入数据库
         if self.flag_insert == "1":
             self.input_database(insert_text)
         print()
     # end for, 判断是不是有空值的元组
     if self.nan_list:
         for var_nan in self.nan_list:
             print("NaN row:", var_nan)
         print("空值的个数:", len(self.nan_list))
예제 #4
0
 def read_file(self):
     data = pd.read_csv(self.file_path, encoding='utf-8')  # 读取数据
     print("data:")
     # 逐个元素判断是否为空值,将空值行,放入一个队列中
     for i in range(len(data)):
         print("i:", i)
         user_id = ""  # 用户ID
         service_id = ""  # 服务ID
         date_time = ""  # 格式化时间
         time_stamp = ""  # 时间戳
         activity = ""  # 行为
         content = ""  # 内容
         title = ""  # 标题
         key_words = []  # 内容关键词(自然语言做的分词)
         row = data.iloc[i]  # 数据元组
         for j in data.columns:
             j = str(j)
             element = str(data.iloc[i][j])
             if j == "用户ID":
                 user_id = element
                 print(j, ":", user_id)
             if j == "服务ID":
                 service_id = element
                 print(j, ":", service_id)
             if j == "时间":
                 if str(data.iloc[i][j]) == "nan":
                     # print("空值", row, j, "element:", data.iloc[i][j])
                     self.nan_list.append(row)
                     continue
                 element = self.match_date_time(element)
                 print(j, ":", element)
                 # 将格式化时间转换成时间戳10位
                 # 1中间过程,一般都需要将字符串转化为时间数组
                 timeArray = time.strptime(element, "%Y年%m月%d日")
                 # 2将"2011-09-28 10:00:00"转化为时间戳
                 time_stamp = int(time.mktime(timeArray))
                 # 将时间戳timestamp转换成格式化的字符串Datetime
                 l_time = time.localtime(time_stamp)
                 date_time = time.strftime("%Y-%m-%d %H:%M:%S", l_time)
                 print("date_time:", date_time, ", timestamp:", time_stamp)
             if j == "行为":
                 activity = element
                 print(j, ":", activity)
             if j == "内容":
                 content = element
                 print(j, ":", content)
                 temp_keywords = dpt4.main(element)  # keywords是一个List结构
                 print("keywords:", temp_keywords)
                 key_words += temp_keywords
             if j == "title" or j == "Title":
                 title = element
                 print(j, ":", title)
                 temp_keywords = dpt4.main(element)  # keywords是一个List结构
                 print("keywords:", temp_keywords)
                 key_words += temp_keywords
             # end if
             # 调用Switch结构
         # 输出每一行的input_list
         _id = self.get_next_counter()
         insert_text = {
             "uid": self.collection_name,
             "用户ID": user_id,
             "服务ID": service_id,
             "时间": date_time,
             "timestamp": time_stamp,
             "activity": activity,
             "内容": content,
             "keywords": key_words,
             "_id": _id,
             "title": title
         }
         print("row_input_list:", insert_text)
         # 插入数据库
         if self.flag_insert == "1":
             self.input_database(insert_text)
         print()
     # end for, 判断是不是有空值的元组
     if self.nan_list:
         for var_nan in self.nan_list:
             print("NaN row:", var_nan)
         print("空值的个数:", len(self.nan_list))
예제 #5
0
 def read_file(self):
     data = pd.read_csv(self.file_path, encoding='utf-8')  # 读取数据
     print("data:")
     # 逐个元素判断是否为空值,将空值行,放入一个队列中
     for i in range(len(data)):
         print("i:", i)
         user_id = ""  # 用户ID
         service_id = ""  # 服务ID
         date_time = ""  # 格式化时间
         time_stamp = ""  # 时间戳
         title = ""  # 标题
         content = ""  # 内容
         key_words = []  # 内容关键词(自然语言做的分词)
         retweet = 0  # 转推数
         like = 0  # 喜欢数
         reply_num = 0  # 回复数
         row = data.iloc[i]  # 数据元组
         # print("type:", type(row), "row: ", row)
         for j in data.columns:
             j = str(j)
             # if str(data.iloc[i][j]) == "nan":
             #     print("空值", row, j, "element:", data.iloc[i][j])
             #     self.nan_list.append(row)
             #     continue
             element = str(data.iloc[i][j])
             if j == "用户ID":
                 user_id = element
                 print(j, ":", user_id)
             if j == "服务ID":
                 service_id = element
                 print(j, ":", service_id)
             if j == "时间":
                 if str(data.iloc[i][j]) == "nan":
                     # print("空值", row, j, "element:", data.iloc[i][j])
                     self.nan_list.append(row)
                     continue
                 element = self.match_timestamp(element)
                 element = self.format_datetime(element)
                 print(j, ":", element)
                 if "am" in element:
                     element = element.replace("am", "")
                     # 格式化的字符串转换成Datetime
                     dt = datetime.datetime.strptime(
                         element, "%d %m %Y, %H:%M")
                     date_time = str(dt)
                     print("时间:", date_time)
                     # 转化成时间戳
                     timeArray = time.strptime(date_time,
                                               "%Y-%m-%d %H:%M:%S")
                     # 2将"2011-09-28 10:00:00"转化为时间戳
                     timestamp = int(time.mktime(timeArray))
                     print("timestamp:", timestamp)
                     time_stamp = timestamp
                 if "pm" in element:
                     element = element.replace("pm", "")
                     # 格式化的字符串转换成Datetime
                     dt = datetime.datetime.strptime(
                         element, "%d %m %Y, %H:%M")
                     print("dt:", dt)
                     # 加上12小时
                     aDay = timedelta(days=0.5)
                     now = dt + aDay
                     print("new now:", now)
                     element = str(now)
                     date_time = element
                     # 再变成时间戳
                     timeArray = time.strptime(element, "%Y-%m-%d %H:%M:%S")
                     # 2将"2011-09-28 10:00:00"转化为时间戳
                     timestamp = int(time.mktime(timeArray))
                     print("timestamp:", timestamp)
                     time_stamp = timestamp
             if j == "title":
                 title = element
                 print(j, ":", title)
                 temp_keywords = dpt4.main(title)  # keywords是一个List结构
                 key_words += temp_keywords
                 print("keywords:", temp_keywords)
             if j == "content_time":
                 # 判断是否有空值
                 if str(data.iloc[i][j]) == "nan":
                     # print("空值", row, j, "element:", data.iloc[i][j])
                     self.nan_list.append(row)
                     continue
                 list_ele = element.split("_")
                 content = list_ele[0]
                 date_time = list_ele[1]
                 print("内容:", content)
                 print("时间:", date_time)
                 timeArray = time.strptime(str(date_time),
                                           "%Y-%m-%d %H:%M:%S")
                 # 2将"2011-09-28 10:00:00"转化为时间戳
                 timestamp = int(time.mktime(timeArray))
                 print("timestamp:", timestamp)
                 time_stamp = timestamp
                 temp_keywords = dpt4.main(content)  # keywords是一个List结构
                 print("keywords:", temp_keywords)
                 key_words = key_words + temp_keywords
             # end if
             # 调用Switch结构
         # 输出每一行的input_list
         insert_text = {
             "uid": self.collection_name,
             "用户ID": user_id,
             "服务ID": service_id,
             "时间": date_time,
             "timestamp": time_stamp,
             "内容": content,
             "title": title,
             "keywords": key_words
         }
         print("row_input_list:", insert_text)
         # 插入数据库
         if self.flag_insert == "1":
             self.input_database(insert_text)
         print()
     # end for, 判断是不是有空值的元组
     if self.nan_list:
         for var_nan in self.nan_list:
             print("NaN row:", var_nan)
         print("空值的个数:", len(self.nan_list))
 def read_file(self):
     data = pd.read_csv(self.file_path, encoding='utf-8')  # 读取数据
     print("data:")
     # 逐个元素判断是否为空值,将空值行,放入一个队列中
     for i in range(len(data)):
         print("i:", i)
         user_id = ""  # 用户ID
         service_id = ""  # 服务ID
         date_time = ""  # 格式化时间
         time_stamp = ""  # 时间戳
         temp_time = ""  # 临时时间
         content = ""  # 内容
         key_words = []  # 内容关键词(自然语言做的分词)
         row = data.iloc[i]  # 数据元组
         # print("type:", type(row), "row: ", row)
         for j in data.columns:
             j = str(j)
             # if str(data.iloc[i][j]) == "nan":
             #     print("空值", row, j, "element:", data.iloc[i][j])
             #     self.nan_list.append(row)
             #     continue
             element = str(data.iloc[i][j])
             if j == "ATT":
                 con_list = element.split(",")
                 for item in range(len(con_list)):
                     print("item:", item, "element:", con_list[item])
                     if item == 0:
                         user_id = con_list[item]
                     if item == 1:
                         service_id = con_list[item]
                     if item == 2:
                         content = con_list[item]
                         temp_keywords = dpt4.main(
                             content)  # keywords是一个List结构
                         print("keywords:", temp_keywords)
                         key_words = key_words + temp_keywords
                     if item == 3:
                         temp_time = con_list[item]
                         temp_time = temp_time.replace("T", " ")
                         element = temp_time.replace("+00:00", "")
                         # 格式化的字符串转换成Datetime
                         dt = datetime.datetime.strptime(
                             element, "%Y-%m-%d %H:%M:%S")
                         date_time = str(dt)
                         print("时间:", date_time)
                         # 转化成时间戳
                         timeArray = time.strptime(date_time,
                                                   "%Y-%m-%d %H:%M:%S")
                         # 2将"2011-09-28 10:00:00"转化为时间戳
                         timestamp = int(time.mktime(timeArray))
                         print("timestamp:", timestamp)
                         time_stamp = timestamp
             if j == "时间":
                 if str(data.iloc[i][j]) == "nan":
                     # print("空值", row, j, "element:", data.iloc[i][j])
                     self.nan_list.append(row)
                     continue
                 print(j, ":", element)
                 if "上午" in element:
                     element = element.replace("上午", "")
                     # 格式化的字符串转换成Datetime
                     dt = datetime.datetime.strptime(
                         element, "%H:%M - %Y年%m月%d日")
                     date_time = str(dt)
                     print("时间:", date_time)
                     # 转化成时间戳
                     timeArray = time.strptime(date_time,
                                               "%Y-%m-%d %H:%M:%S")
                     # 2将"2011-09-28 10:00:00"转化为时间戳
                     timestamp = int(time.mktime(timeArray))
                     print("timestamp:", timestamp)
                     time_stamp = timestamp
                 if "下午" in element:
                     element = element.replace("下午", "")
                     # 格式化的字符串转换成Datetime
                     dt = datetime.datetime.strptime(
                         element, "%H:%M - %Y年%m月%d日")
                     print("dt:", dt)
                     # 加上12小时
                     aDay = timedelta(days=0.5)
                     now = dt + aDay
                     print("new now:", now)
                     element = str(now)
                     # 再变成时间戳
                     timeArray = time.strptime(element, "%Y-%m-%d %H:%M:%S")
                     # 2将"2011-09-28 10:00:00"转化为时间戳
                     timestamp = int(time.mktime(timeArray))
                     print("timestamp:", timestamp)
                     time_stamp = timestamp
             # end if
             # 调用Switch结构
         # 输出每一行的input_list
         insert_text = {
             "uid": self.collection_name,
             "用户ID": user_id,
             "服务ID": service_id,
             "时间": date_time,
             "timestamp": time_stamp,
             "内容": content,
             "keywords": key_words
         }
         print("row_input_list:", insert_text)
         # 插入数据库
         self.input_database(insert_text)
         print()
     # end for, 判断是不是有空值的元组
     if self.nan_list:
         for var_nan in self.nan_list:
             print("NaN row:", var_nan)
         print("空值的个数:", len(self.nan_list))
 def read_file(self):
     data = pd.read_csv(self.file_path, encoding='utf-8')  # 读取数据
     print("data:")
     # 逐个元素判断是否为空值,将空值行,放入一个队列中
     for i in range(len(data)):
         print("i:", i)
         user_id = ""  # 用户ID
         service_id = ""  # 服务ID
         date_time = ""  # 格式化时间
         time_stamp = ""  # 时间戳
         activity = ""  # 行为
         content = ""  # 内容
         key_words = []  # 内容关键词(自然语言做的分词)
         row = data.iloc[i]  # 数据元组
         # print("type:", type(row), "row: ", row)
         for j in data.columns:
             j = str(j)
             # if str(data.iloc[i][j]) == "nan":
             #     print("空值", row, j, "element:", data.iloc[i][j])
             #     self.nan_list.append(row)
             #     continue
             element = str(data.iloc[i][j])
             if j == "用户ID":
                 user_id = element
                 print(j, ":", user_id)
             if j == "服务ID":
                 service_id = element
                 print(j, ":", service_id)
             if j == "timestamp":
                 if str(data.iloc[i][j]) == "nan":
                     self.nan_list.append(row)
                     continue
                 element = element.replace(".0", "")
                 print("element:", element, ", type:", type(element))
                 time_stamp = int(element)
                 # 将时间戳timestamp转换成格式化的字符串Datetime
                 l_time = time.localtime(time_stamp)
                 date_time = time.strftime("%Y-%m-%d %H:%M:%S", l_time)
                 print("date_time:", date_time, ", timestamp:", time_stamp)
             if j == "内容":
                 element = element.replace("Idan Adar", "")
                 element = element.replace("的", "")
                 element = element.replace(' “@', "")
                 content = element
                 print(j, ":", content)
                 temp_keywords = dpt4.main(element)  # keywords是一个List结构
                 print("keywords:", temp_keywords)
                 key_words = key_words + temp_keywords
             if j == "title":
                 element = element.replace("Idan Adar", "")
                 element = element.replace("的", "")
                 element = element.replace(' “@', "")
                 content = element
                 print(j, ":", content)
                 temp_keywords = dpt4.main(element)  # keywords是一个List结构
                 print("keywords:", temp_keywords)
                 key_words = key_words + temp_keywords
             # end if
         _id = self.get_next_counter()
         print("_id:", _id)
         # 输出每一行的input_list
         insert_text = {
             "uid": self.collection_name,
             "用户ID": user_id,
             "服务ID": service_id,
             "时间": date_time,
             "timestamp": time_stamp,
             "内容": content,
             "keywords": key_words,
             "_id": _id
         }
         print("row_input_list:", insert_text)
         # 插入数据库
         if self.flag_insert == "1":
             self.input_database(insert_text)
         print()
     # end for, 判断是不是有空值的元组
     if self.nan_list:
         for var_nan in self.nan_list:
             print("NaN row:", var_nan)
         print("空值的个数:", len(self.nan_list))