Ejemplo n.º 1
0
    def _get_rating(self, rating):
        try:
            return int(rating.lstrip(u'rank_now'))
        except Exception as e:
            log_error(e)

        return 0
Ejemplo n.º 2
0
    def _get_rating(self, rating):
        try:
            return int(rating.lstrip(u'rank_now'))
        except Exception as e:
            log_error(e)

        return 0
Ejemplo n.º 3
0
 def _get_rating(self, rating):
     try:
         if rating:
             rating = float(-int(rating.split()[1][:-3])) / 30
             return rating
     except Exception as e:
         log_error(e)
     return 0
Ejemplo n.º 4
0
 def _get_rating(self, rating):
     try:
         if rating:
             rating = float(-int(rating.split()[1][:-3])) / 30
             return rating
     except Exception as e:
         log_error(e)
     return 0
Ejemplo n.º 5
0
 def item_completed(self, results, item, info):
     for ok, result in results:
         if not ok:
             log_error("fail to download icon for %s" % item['source_link'])
         else:
             item['icon_path'] = result['path']
         break
     return item
Ejemplo n.º 6
0
 def item_completed(self, results, item, info):
     for ok, result in results:
         if not ok:
             log_error("fail to download icon for %s" % item['source_link'])
         else:
             item['icon_path'] = result['path']
         break
     return item
Ejemplo n.º 7
0
    def parse_item(self, response):
        meta = response.request.meta
        source = meta['domain']
        log_info('parse_item_1===========')
        #source = 'appchina.com'
        url = response.request.url
        if self.sourcelinkprocessor_class:
            processor = self.sourcelinkprocessor_class()
            url = processor.process(url)

        if not self._process_response(response, source, LinkType.LEAF):
            service.report_status([
                LinkStatus(meta['redirect_urls'][0], source, Status.FAIL, type)
            ])
            market.remove_app(url, source)
            log_info('parse_item_2===========')
            return

        if not self.name.startswith(
                'update.') and self.name != 'itunes.apple.com':
            self.parse(response)

        if source.endswith('hiapk.com'):
            body = response.body.replace('</br>', '<p>')
            response = response.replace(body=body)

        if not self.itemloader_class:
            log_info('parse_item_3===========')
            return

        try:
            selector = HtmlXPathSelector(response)
            try:
                loader = self.itemloader_class(selector, response=response)
            except:
                loader = self.itemloader_class(selector)
            # log_info("loader=====%s" %  type(loader))
            loader.add_value('source', source)
            loader.add_value('source_link', url)
        except Exception as e:
            log_info('parse_item_4===========\n%s' % e)
            log_error(e)
            if self.name.startswith('update.'):
                service.report_update_status(
                    [LinkStatus(url, source, Status.FAIL, LinkType.UNKNOWN)])
            else:
                service.report_status(
                    [LinkStatus(url, source, Status.FAIL, LinkType.UNKNOWN)])

        log_info('parse_item_5===========')
        try:
            item = loader.load_item()
            if (self.is_item_valid(item)):
                return item
            else:
                market.remove_app(url, source)
        except Exception as e:
            log_error(e)
Ejemplo n.º 8
0
 def item_completed(self, results, item, info):
     images_path = []
     for ok, result in results:
         if not ok:
             log_error("fail to download image for %s" % item['source_link'])
         else:
             images_path.append(result['path'])
     item['images_path'] = ' '.join(images_path)
     return item
Ejemplo n.º 9
0
 def item_completed(self, results, item, info):
     images_path = []
     for ok, result in results:
         if not ok:
             log_error("fail to download image for %s" %
                       item['source_link'])
         else:
             images_path.append(result['path'])
     item['images_path'] = ' '.join(images_path)
     return item
Ejemplo n.º 10
0
 def process_item(self, item, spider):
     try:
         adapter = ItemAdapterFactory.get_itemadapter(item.get('source'))
         if adapter:
             item = adapter.adapt(item)
         return item
     except Exception as e:
         print "------------------------%s" % e
         log_error(e)
         raise DropItem()
Ejemplo n.º 11
0
 def process_item(self, item, spider):
     try:
         adapter = ItemAdapterFactory.get_itemadapter(item.get('source'))
         if adapter:
             item = adapter.adapt(item)
         return item
     except Exception as e:
         print "------------------------%s" % e
         log_error(e)
         raise DropItem()
Ejemplo n.º 12
0
 def _get_download_link(self, download_link):
     try:
         if download_link:
             download_link_match = self._download_link_pattern.search(download_link)
             if download_link_match:
                 download_link = '%s/dl_app.php?s=%s' % (self._base_url, download_link_match.group(1))
             return download_link
     except Exception as e:
         log_error(e)
         return download_link
Ejemplo n.º 13
0
def _thrift_call(func):
    try:
        transport = TSocket.TSocket(SERVICE_CONFIG['host'], SERVICE_CONFIG['port'])
        transport = TTransport.TBufferedTransport(transport)
        protocol = TBinaryProtocol.TBinaryProtocol(transport)
        client = Links.Client(protocol)
        transport.open()
        return func(client)
    except Thrift.TException as tx:
        log_error(tx)
    finally:
        transport.close()
Ejemplo n.º 14
0
def save_download_link(item):
    if not item:
        return
    try:
        c = _conn.cursor()
        sql = 'insert ignore into apk_links(link, updated_at) values (%s, now())'
        c.execute(sql, (item['url'], ))
        _conn.commit()
    except Exception as e:
        log_error(e)
    finally:
        c.close()
Ejemplo n.º 15
0
def report_link(source, catetory, link, description=''):
    try:
        cursor = _conn.cursor()

        insert_sql = "INSERT INTO %s (source, category, link, description, create_time) VALUES('%s', '%s', '%s', '%s', %s)" % \
            (_link_monitor_table, source, catetory, link, description, get_epoch_datetime())
        cursor.execute(insert_sql)
        _conn.commit()
    except MySQLdb.Error as e:
        log_error(e)
    finally:
        cursor.close()
Ejemplo n.º 16
0
def save_download_link(item):
    if not item:
        return
    try:
        c = _conn.cursor()
        sql = 'insert ignore into apk_links(link, updated_at) values (%s, now())'
        c.execute(sql, (item['url'], ))
        _conn.commit()
    except Exception as e:
        log_error(e)
    finally:
        c.close()
Ejemplo n.º 17
0
def report_link(source, catetory, link, description=''):
    try:
        cursor = _conn.cursor()

        insert_sql = "INSERT INTO %s (source, category, link, description, create_time) VALUES('%s', '%s', '%s', '%s', %s)" % \
            (_link_monitor_table, source, catetory, link, description, get_epoch_datetime())
        cursor.execute(insert_sql)
        _conn.commit()
    except MySQLdb.Error as e:
        log_error(e)
    finally:
        cursor.close()
Ejemplo n.º 18
0
 def _get_download_link(self, download_link):
     try:
         if download_link:
             download_link_match = self._download_link_pattern.search(
                 download_link)
             if download_link_match:
                 download_link = '%s/dl_app.php?s=%s' % (
                     self._base_url, download_link_match.group(1))
             return download_link
     except Exception as e:
         log_error(e)
         return download_link
Ejemplo n.º 19
0
    def parse_item(self, response):
        meta = response.request.meta
        source = meta['domain']
        log_info('parse_item_1===========')
        #source = 'appchina.com'
        url = response.request.url
        if self.sourcelinkprocessor_class:
            processor = self.sourcelinkprocessor_class()
            url = processor.process(url)

        if not self._process_response(response, source, LinkType.LEAF):
            service.report_status([LinkStatus(meta['redirect_urls'][0], source, Status.FAIL, type)])
            market.remove_app(url, source)
            log_info('parse_item_2===========')
            return

        if not self.name.startswith('update.') and self.name != 'itunes.apple.com':
            self.parse(response)

        if source.endswith('hiapk.com'):
            body = response.body.replace('</br>', '<p>')
            response = response.replace(body=body)

        if not self.itemloader_class:
            log_info('parse_item_3===========')
            return

        try:
            selector = HtmlXPathSelector(response)
            try:
                loader = self.itemloader_class(selector, response=response)
            except:
                loader = self.itemloader_class(selector)
            # log_info("loader=====%s" %  type(loader))
            loader.add_value('source', source)
            loader.add_value('source_link', url)
        except Exception as e:
            log_info('parse_item_4===========\n%s' %  e)
            log_error(e)
            if self.name.startswith('update.'):
                service.report_update_status([LinkStatus(url, source, Status.FAIL, LinkType.UNKNOWN)])
            else:
                service.report_status([LinkStatus(url, source, Status.FAIL, LinkType.UNKNOWN)])

        log_info('parse_item_5===========' )
        try:
            item = loader.load_item()
            if (self.is_item_valid(item)):
                return item
            else:
                market.remove_app(url, source)
        except Exception as e:
            log_error(e)
Ejemplo n.º 20
0
def _thrift_call(func):
    try:
        transport = TSocket.TSocket(SERVICE_CONFIG['host'],
                                    SERVICE_CONFIG['port'])
        transport = TTransport.TBufferedTransport(transport)
        protocol = TBinaryProtocol.TBinaryProtocol(transport)
        client = Links.Client(protocol)
        transport.open()
        return func(client)
    except Thrift.TException as tx:
        log_error(tx)
    finally:
        transport.close()
Ejemplo n.º 21
0
 def _get_downloads(self, downloads):
     try:
         downloads = downloads.replace(u'\u4e0b\u8f7d', '').replace(
             u'\u5c0f\u4e8e', '').replace(u'\u5927\u4e8e', '').strip()
         if u'\u4e07\u6b21' in downloads:
             downloads = int(downloads.replace(u'\u4e07\u6b21', '')) * 10000
         elif u'\u5343\u6b21' in downloads:
             downloads = int(downloads.replace(u'\u5343\u6b21', '')) * 1000
         elif u'\u6b21' in downloads:
             downloads = int(downloads.replace(u'\u6b21', ''))
     except Exception as e:
         log_error(e)
     return downloads
Ejemplo n.º 22
0
    def _get_downloads(self, download_link):
        try:
            if download_link:
                app_id = download_link.split(u'/')[-1]
                # have to pass some data to make it a POST request
                data = urllib.urlencode({'foo': 'bar'})
                response = urllib2.urlopen(self._download_post_url + app_id, data, timeout=15)
                result = response.read()
                return int(result)
        except Exception as e:
            log_error(e)

        return 0
Ejemplo n.º 23
0
def get_category(app_id):
    try:
        cursor = _conn.cursor()
        sql = "SELECT cate_name FROM itunes_game_cate where app_id = %s"
        cursor.execute(sql, app_id)
        result = cursor.fetchone()
        if not result:
            return u'\u52a8\u4f5c\u6e38\u620f'
        else:
            return result[0]
    except MySQLdb.Error as e:
        log_error(e)
    finally:
        cursor.close()
Ejemplo n.º 24
0
def get_category(app_id):
    try:
        cursor = _conn.cursor()
        sql = "SELECT cate_name FROM itunes_game_cate where app_id = %s"
        cursor.execute(sql, app_id)
        result = cursor.fetchone()
        if not result:
            return u'\u52a8\u4f5c\u6e38\u620f'
        else:
            return result[0]
    except MySQLdb.Error as e:
        log_error(e)
    finally:
        cursor.close()
Ejemplo n.º 25
0
    def _get_rating(self, rating):
        try:
            if rating:
                rating = float(rating)
                rating = int(round(rating * 10))
                if rating < 0:
                    return 0
                elif rating > 50:
                    return 50
                else:
                    return rating
        except Exception as e:
            log_error(e)

        return 0
Ejemplo n.º 26
0
    def _get_downloads(self, download_link):
        try:
            if download_link:
                app_id = download_link.split(u'/')[-1]
                # have to pass some data to make it a POST request
                data = urllib.urlencode({'foo': 'bar'})
                response = urllib2.urlopen(self._download_post_url + app_id,
                                           data,
                                           timeout=15)
                result = response.read()
                return int(result)
        except Exception as e:
            log_error(e)

        return 0
Ejemplo n.º 27
0
    def _get_rating(self, rating):
        try:
            if rating:
                rating = float(rating)
                rating = int(round(rating * 10))
                if rating < 0:
                    return 0
                elif rating > 50:
                    return 50
                else:
                    return rating
        except Exception as e:
            log_error(e)

        return 0
Ejemplo n.º 28
0
 def process_item(self, item, spider):
     log_info("Doing=============")
     try:
         if not hasattr(spider, 'is_item_valid') or spider.is_item_valid(item, 1):
             if isinstance(item, (CrawledItem, )):
                 market.save_app(item, spider.name)
             elif isinstance(item, (DownloadLinkItem, )):
                 market.save_download_link(item)
             return item
         else:
             raise DropItem("invalid item: %s" % item)
     except Exception as e:
         print "------------------------%s" % e
         log_error(e)
         traceback.print_exc()
         raise DropItem()
Ejemplo n.º 29
0
 def process_item(self, item, spider):
     log_info("Doing=============")
     try:
         if not hasattr(spider, 'is_item_valid') or spider.is_item_valid(
                 item, 1):
             if isinstance(item, (CrawledItem, )):
                 market.save_app(item, spider.name)
             elif isinstance(item, (DownloadLinkItem, )):
                 market.save_download_link(item)
             return item
         else:
             raise DropItem("invalid item: %s" % item)
     except Exception as e:
         print "------------------------%s" % e
         log_error(e)
         traceback.print_exc()
         raise DropItem()
Ejemplo n.º 30
0
def save_app(item, name):
    if not item:
        return
    try:
        cursor = _conn.cursor()

        # record last_crawl time
        item['last_crawl'] = get_epoch_datetime()
        if name == 'play.google.com':
            save_final_app(cursor, item)
        else:
            _upsert_item(cursor, item, _table_dic.get(name, 'app'))
        _conn.commit()
    except Exception as e:
        log_error(e)
    finally:
        cursor.close()
Ejemplo n.º 31
0
def save_app(item, name):
    if not item:
        return
    try:
        cursor = _conn.cursor()

        # record last_crawl time
        item['last_crawl'] = get_epoch_datetime()
        if name == 'play.google.com':
            save_final_app(cursor, item)
        else:
            _upsert_item(cursor, item, _table_dic.get(name, 'app'))
        _conn.commit()
    except Exception as e:
        log_error(e)
    finally:
        cursor.close()
Ejemplo n.º 32
0
    def process_item(self, item, spider):
        try:
            icon_dic = {}
            icon_dic['url'] = item['icon_link']
            icon_dic['source_link'] = item['source_link']
            icon_dic['source'] = 'icon'
            market.push_image_url(icon_dic)

            image_dic = {}
            image_dic['url'] = item['images']
            image_dic['source_link'] = item['source_link']
            image_dic['source'] = 'image'
            market.push_image_url(image_dic)

            return item
        except Exception as e:
            log_error(e)
            raise DropItem()
Ejemplo n.º 33
0
    def process_item(self, item, spider):
        try:
            icon_dic = {}
            icon_dic['url'] = item['icon_link']
            icon_dic['source_link'] = item['source_link']
            icon_dic['source'] = 'icon'
            market.push_image_url(icon_dic)

            image_dic = {}
            image_dic['url'] = item['images']
            image_dic['source_link'] = item['source_link']
            image_dic['source'] = 'image'
            market.push_image_url(image_dic)

            return item
        except Exception as e:
            log_error(e)
            raise DropItem()
Ejemplo n.º 34
0
 def _get_downloads(self, downloads):
     try:
         downloads = downloads.replace(
             u'\u4e0b\u8f7d',
             '').replace(
             u'\u5c0f\u4e8e',
             '').replace(
             u'\u5927\u4e8e',
             '').strip(
         )
         if u'\u4e07\u6b21' in downloads:
             downloads = int(downloads.replace(u'\u4e07\u6b21', '')) * 10000
         elif u'\u5343\u6b21' in downloads:
             downloads = int(downloads.replace(u'\u5343\u6b21', '')) * 1000
         elif u'\u6b21' in downloads:
             downloads = int(downloads.replace(u'\u6b21', ''))
     except Exception as e:
         log_error(e)
     return downloads
Ejemplo n.º 35
0
def get_apple_id(app_id):
    try:
        cursor = _conn.cursor()
        sql = "SELECT apple_id FROM app_itunes where app_id = %s"
        cursor.execute(sql, app_id)
        result = cursor.fetchone()
        if not result:
            cursor = _conn.cursor()
            sql = "SELECT username FROM apple_account ORDER BY app_num ASC limit 1"
            cursor.execute(sql)
            result = cursor.fetchone()
        sql = "UPDATE apple_account set app_num = app_num + 1 WHERE username=%s"
        cursor.execute(sql, result[0])
        _conn.commit()
        return result[0]
    except MySQLdb.Error as e:
        log_error(e)
    finally:
        cursor.close()
Ejemplo n.º 36
0
def get_apple_id(app_id):
    try:
        cursor = _conn.cursor()
        sql = "SELECT apple_id FROM app_itunes where app_id = %s"
        cursor.execute(sql, app_id)
        result = cursor.fetchone()
        if not result:
            cursor = _conn.cursor()
            sql = "SELECT username FROM apple_account ORDER BY app_num ASC limit 1"
            cursor.execute(sql)
            result = cursor.fetchone()
        sql = "UPDATE apple_account set app_num = app_num + 1 WHERE username=%s"
        cursor.execute(sql, result[0])
        _conn.commit()
        return result[0]
    except MySQLdb.Error as e:
        log_error(e)
    finally:
        cursor.close()
Ejemplo n.º 37
0
 def _get_category(self, category):
     try:
         category = category.replace(u'\u7c7b\u522b:', '').strip()
     except Exception as e:
         log_error(e)
     return category
Ejemplo n.º 38
0
 def _get_category(self, category):
     try:
         category = category.replace(u'\u7c7b\u522b:', '').strip()
     except Exception as e:
         log_error(e)
     return category