コード例 #1
0
class GetAnswerDoctor(object):
    def __init__(self, url_table, answer_doctor_table, family_doctor_table,
                 zhuanjia_doctor_table):
        self.url_table = url_table
        self.answer_doctor_table = answer_doctor_table
        self.family_doctor_table = family_doctor_table
        self.zhuanjia_doctor_table = zhuanjia_doctor_table
        self.doctor_url = list()
        pass

    def get_url(self):
        self.mysql = MySQLDatabaseClass()
        self.doctor_url = self.mysql.select(table=self.url_table)
        return self.doctor_url

    def get_doctor_info(self):
        for url in self.doctor_url:
            doctor_url = url['doctor_url']
            doctor = Doctor(url=doctor_url,
                            answer_doctor_table=self.answer_doctor_table,
                            family_doctor_table=self.family_doctor_table,
                            zhuanjia_doctor_table=self.zhuanjia_doctor_table,
                            mysql=self.mysql)
            if 'family' in doctor_url:
                doctor.get_family_doctor()
                pass
            elif 'doc_card' in doctor_url:
                # doctor.get_answer_doctor()
                pass
            elif 'z.xywy.com/doc' in doctor_url:
                # doctor.get_zhuanjia_doctor()
                pass
コード例 #2
0
class IntoMySQL(object):
    def __init__(self, book_table, used_book_table):
        self.mysql = MySQLDatabaseClass()
        self.book_table = book_table
        self.used_book_table = used_book_table
        self.total_number = 0
        self.drop_number = 0

    def write_mysql(self, data_file_name):
        data_file = open(data_file_name, "r")
        for line in data_file:
            self.total_number += 1
            book = json.loads(line)
            used_book_list = copy.deepcopy(book["used_book_list"])
            book.pop("used_book_list")
            book.pop("used_book_page")
            try:
                book["number_of_used_crawled"] = len(used_book_list)
            except:
                book["number_of_used_crawled"] = 0

            try:
                if book["amazon_price"] < book["lowest_used_price"]:
                    line = json.loads(line)
                    print("#############")
                    print(json.dumps(line, indent=2))
            except Exception as e:
                print(traceback.format_exc(), e.args[0])

            # if book["ISBN_13"] is not None and book["ISBN_10"] is not None:
            #     self.mysql.insert(table=self.book_table, record=book)
            #     print "writing used books..."
            #     if used_book_list is not None and len(used_book_list) > 0:
            #         self._write_used_book(used_book_list=used_book_list, isbn_13=book["ISBN_13"],isbn_10=book["ISBN_10"])
            #         pass
            # else:
            #     self.drop_number += 1

    def _write_used_book(self, used_book_list, isbn_13, isbn_10):
        for index in range(0, len(used_book_list), 1):
            used_book = used_book_list[index]
            used_book["ISBN_index"] = isbn_10 + '-' + isbn_13 + '-' + str(
                index)
            used_book["ISBN_13"] = isbn_13
            used_book["ISBN_10"] = isbn_10
            self.mysql.insert(table=self.used_book_table, record=used_book)
コード例 #3
0
 def update_post(self):
     mysql = MySQLDatabaseClass()
     post_list = mysql.select(table='post')
     for post in post_list:
         post = self.get_post_detail(post)
         mysql.update(table='post',
                      record=post,
                      primary_key={'post_url': post['post_url']})
     mysql.close()
コード例 #4
0
 def get_post(self):
     mysql = MySQLDatabaseClass()
     for url in self.target_url_list:
         post_list = self.get_post_url(url=url)
         for post in post_list:
             print post
             mysql.insert(table='post', record=post)
     mysql.close()
コード例 #5
0
def into_database(file_name):
    data_file = open(file_name, 'r')
    file_name = file_name.split('/')[-1]
    mysql = MySQLDatabaseClass()
    index = 0
    number = 0
    for line in data_file:
        record = json.loads(line)
        record['file_name'] = file_name
        print(file_name, number, record)
        mysql.insert(table='question', record=record)
        index = index + 1
        number = number + 1
        if index > 100000:
            mysql.close()
            mysql = MySQLDatabaseClass()
            index = 0
        else:
            pass
    data_file.close()
コード例 #6
0
 def update_doctor_info():
     """
     跟新这次抓取的医生信息
     :return:
     """
     mysql = MySQLDatabaseClass()
     doctor_dict_list = mysql.select(
         table=table_info['doctor_communication'],
         record={'crawl_number': crawl_number})
     for doctor_dict in doctor_dict_list:
         url = doctor_dict['doctor_url']
         doctor = DoctorSpider(url=url, crawl_number=crawl_number)
         doctor_info = doctor.get_number()
         if doctor_info != None:
             mysql.update(table=table_info['doctor_communication'],
                          record=doctor_info,
                          primary_key={
                              'doctor_url': url,
                              'crawl_number': doctor_dict['crawl_number']
                          })
             print '医生信息已入库', url
         else:
             pass
     mysql.close()
コード例 #7
0
 def __init__(self, message, doctor_url_split, crawl_number):
     self.type = message['type']
     self.post_comment = json.loads(message['content'])
     self.mysql = MySQLDatabaseClass()
     self.doctor_url_split = doctor_url_split
     self.crawl_number = crawl_number
コード例 #8
0
class InsertDatabase(object):
    def __init__(self, message, doctor_url_split, crawl_number):
        self.type = message['type']
        self.post_comment = json.loads(message['content'])
        self.mysql = MySQLDatabaseClass()
        self.doctor_url_split = doctor_url_split
        self.crawl_number = crawl_number

    def process(self):
        post = self.post_comment['post']
        comment_list = self.post_comment['comment_list']
        self.mysql.insert(table=table_info[self.type]['post'], record=post)
        print '***帖子信息已入库'
        index = 1
        for comment in comment_list:
            self.mysql.insert(table=table_info[self.type]['comment_first'],
                              record=comment['comment_first'])
            print '***第', str(index), '个一级评论已入库'
            index = index + 1
            for comment_second in comment['comment_second_list']:
                parent_comment_list = self.mysql.select(
                    table=table_info[self.type]['comment_first'],
                    record=comment['comment_first'])
                if len(parent_comment_list) == 0:
                    print 'Error, not find parent comment for:', comment_second
                    pass
                else:
                    parent_comment = parent_comment_list[0]
                    comment_second[
                        'parent_comment_doctor_url'] = parent_comment[
                            'doctor_url']
                    comment_second[
                        'parent_comment_comment_time'] = parent_comment[
                            'comment_time']
                    comment_second[
                        'parent_comment_comment_content'] = parent_comment[
                            'comment_content']
                    self.mysql.insert(
                        table=table_info[self.type]['comment_second'],
                        record=comment_second)
                    print '***二级评论已入库'
                    pass
        self.insert_doctor_url(doctor_url_list=post['post_like_doctor_url'],
                               split=True)
        self.insert_doctor_url(doctor_url_list=post['post_comment_doctor_url'],
                               split=True)
        self.insert_doctor_url(doctor_url_list=[post['post_doctor_url']])

    def insert_doctor_url(self, doctor_url_list, split=False):
        if split == True:
            doctor_url_list = doctor_url_list.split(self.doctor_url_split)
        else:
            pass
        for url in doctor_url_list:
            if 'club.xywy.com/doc_card' in url:
                doctor_info = {}
                doctor_info['doctor_url'] = url
                doctor_info['crawl_number'] = self.crawl_number
                print doctor_info
                self.mysql.insert(table=table_info['doctor_communication'],
                                  record=doctor_info)
                doctor_info['crawl_time'] = datetime.datetime.now().strftime(
                    '%Y-%m-%d')
                self.mysql.insert(table=table_info['doctor_url'],
                                  record=doctor_info)

    @staticmethod
    def update_doctor_info():
        """
        跟新这次抓取的医生信息
        :return:
        """
        mysql = MySQLDatabaseClass()
        doctor_dict_list = mysql.select(
            table=table_info['doctor_communication'],
            record={'crawl_number': crawl_number})
        for doctor_dict in doctor_dict_list:
            url = doctor_dict['doctor_url']
            doctor = DoctorSpider(url=url, crawl_number=crawl_number)
            doctor_info = doctor.get_number()
            if doctor_info != None:
                mysql.update(table=table_info['doctor_communication'],
                             record=doctor_info,
                             primary_key={
                                 'doctor_url': url,
                                 'crawl_number': doctor_dict['crawl_number']
                             })
                print '医生信息已入库', url
            else:
                pass
        mysql.close()

    def close(self):
        self.mysql.close()
コード例 #9
0
 def __init__(self, book_table, used_book_table):
     self.mysql = MySQLDatabaseClass()
     self.book_table = book_table
     self.used_book_table = used_book_table
     self.total_number = 0
     self.drop_number = 0
コード例 #10
0
 def get_url(self):
     self.mysql = MySQLDatabaseClass()
     self.doctor_url = self.mysql.select(table=self.url_table)
     return self.doctor_url