Beispiel #1
0
class WeiboItem(scrapy.Item):
    collection = 'weibos'
    id = Field()
    attitudes_count = Field()
    comments_count = Field()
    reposts_count = Field()
    source = Field()
    pictures = Field()
    text = Field()
    textLength = Field()
    thumbnail = Field()
    user = Field()
    created_at = Field()
    crawled_at = Field()
Beispiel #2
0
class TaiwanselectItem(Item):
    # define the fields for your item here like:
    # name = scrapy.Field()
    title = Field()
    time = Field()
    content = Field()
class User(Item):
    ID = Field()  # user id
    name = Field()  # user name
    screen_name = Field()  # user screen name
    avatar = Field()  # avator url
Beispiel #4
0
class BeikeItem(Item):
    house_name = Field()
    sale_status = Field()
    house_type = Field()
    house_address = Field()
    house_price = Field()
Beispiel #5
0
class CommentItem(Item):
    table_name = 'commentTable'
    restaurant = Field()
    rest_url = Field()
    review_id = Field()
    user_id = Field()
    score = Field()
    date = Field()
    comment = Field()
    useful = Field()
    funny = Field()
    cool = Field()
    label = Field()
Beispiel #6
0
class RaceItem(Item):
    year = Field()
    year_url = Field()
    winner_url = Field()
    race_map = Field()
    src_racemap = Field()
    winner_photo = Field()
    winner_flag = Field()
    winner_name = Field()
    climber_url = Field()
    climber_photo = Field()
    climber_flag = Field()
    climber_name = Field()
    sprinter_url = Field()
    sprinter_photo = Field()
    sprinter_flag = Field()
    sprinter_name = Field()
    young_url = Field()
    young_photo = Field()
    young_flag = Field()
    young_name = Field()
Beispiel #7
0
class AcersrentkolItem(scrapy.Item):
    carpet_area = Field()
    updated_date = Field() 
    management_by_landlord = Field()
    areacode = Field()
    mobile_lister = Field()
    google_place_id = Field()
    #immediate_possession = Field()
    age = Field()
    address = Field()
    price_on_req = Field()
    sublocality = Field()
    config_type = Field()
    platform = Field()
    city = Field()
    listing_date = Field()
    txn_type = Field()
    property_type = Field()
    Building_name = Field()
    lat = Field()
    longt = Field()
    locality = Field()
    #sqft = Field()
    Status = Field()
    listing_by = Field() 
    name_lister = Field()
    Selling_price = Field()
    Monthly_Rent = Field()
    details = Field()
    data_id = Field()
    Possession = Field()
    Launch_date = Field()
    price_per_sqft = Field()
    Bua_sqft = Field()
    quality1 = Field()
    quality2 = Field()
    quality3 = Field()
    quality4 = Field()
    scraped_time = Field()
Beispiel #8
0
class BaseItem(Item):
    company_href = Field()  # 公司链接
    company_name = Field()  # 公司名
    companysize_text = Field()  # 公司规模
    industryField = Field()  # 公司业务
    experience = Field()  # 工作经验
    issuedate = Field()  # 更新时间
    job_href = Field()  # 工作链接
    job_name = Field()  # 工作名
    job_info = Field()  # 职位要求
    job_keyword = Field()  # 关键字
    providesalary_text = Field()  # 工资
    workarea_text = Field()  # 工作地点
    crawl_time = Field()  # 爬取时间
Beispiel #9
0
class MainPostItem(Item):
    """ 主题帖对象 """

    # 主题帖所在论坛id
    main_post_forum_id = Field()
    # 主题帖所在论坛名称
    main_post_forum_name = Field()
    # 主题帖id
    main_post_id = Field()
    # 主题帖第一个id
    main_post_first_id = Field()
    # 主题帖话题标志
    main_post_topic_flag = Field()
    # 主题帖置顶标志
    main_post_top_flag = Field()
    # 主题帖精品标志
    main_post_good_flag = Field()
    # 主题帖链接
    main_post_url = Field()
    # 主题帖标题
    main_post_title = Field()
    # 主题帖回复数(回复贴数)
    main_post_reply_count = Field()
    # 主题帖内容
    main_post_content = Field()
    # 主题帖评论数
    main_post_comment_num = Field()
    # 主题帖作者信息
    post_author_infor = Field()
Beispiel #10
0
class MatplotlibItem(scrapy.Item):
    # define the fields for your item here like:
    # name = scrapy.Field()
    file_urls = Field()
    files = Field()
Beispiel #11
0
class Job51Item(BaseItem):
    companytype_text = Field()  # 公司类型
    jobwelf = Field()  # 公司待遇
Beispiel #12
0
class ReviewItem(Item):
    name = Field()
    rank = Field()
    reviewer = Field()
    email = Field()
    country = Field()
Beispiel #13
0
class AmazonspiderItem(Item):
    client_url = Field()
    asins = Field()
    error_message = Field()
Beispiel #14
0
class JobItem(Item):
    url = Field()
    name = Field()
    location = Field()
    salary = Field()
    pub_date = Field()
    available_number = Field()
    job_category = Field()
    exp = Field()
    edu = Field()
    duty = Field()
    demand = Field()
    welfare = Field()
    work_time = Field()
    address = Field()
    company_name = Field()
    company_scale = Field()
    company_type = Field()
    company_industry = Field()
    from_website = Field()
    scrape_time = Field()
Beispiel #15
0
class InformationItem(Item):
    """ 个人信息 """
    _id = Field()  # 用户ID
    NickName = Field()  # 昵称
    Gender = Field()  # 性别
    Province = Field()  # 所在省
    City = Field()  # 所在城市
    BriefIntroduction = Field()  # 简介
    Autograph = Field()  # 签名
    Birthday = Field()  # 生日
    Num_Tweets = Field()  # 微博数
    Num_Follows = Field()  # 关注数
    Num_Fans = Field()  # 粉丝数
    SexOrientation = Field()  # 性取向
    Sentiment = Field()  # 感情状况
    VIPlevel = Field()  # 会员等级
    Authentication = Field()  # 认证
    URL = Field()  # 首页链接
Beispiel #16
0
class CompanyIdItem(Item):
    _id = Field()
Beispiel #17
0
class RelationshipsItem(Item):
    """ 用户关系,只保留与关注的关系 """
    Host1 = Field()
    Host2 = Field()  # 被关注者的ID
Beispiel #18
0
class ImageItem(Item):
    image_urls = Field()
    images = Field()
    image_paths = Field()
Beispiel #19
0
class TopicItem(Item):
    # define the fields for your item here like:
    # name = scrapy.Field()
    type = Field()
    title = Field()
    link = Field()
    writer = Field()
    post_time = Field()
    profile_link = Field()
    departure_time = Field()
    return_time = Field()
    destination = Field()
    views = Field()
    replies = Field()
    likes = Field()

    def istopic(self):
        return True
Beispiel #20
0
class Restaurant(Item):

    id = Field()
    name = Field()
    address = Field()
    link = Field()
    rating = Field()
    liked = Field()

    phone_number = Field()

    avg_cost = Field()

    # enum maybe cash, card or both
    bill_acceptance = Field()

    # List maybe or str
    zone = Field()

    been_there_count = Field()

    # dict maybe, from monday to sunday
    opening_hours = Field()
    # list may in multiple collection
    collection = Field()

    # list, may have multiple cuisine type (casual dining, bar)
    restaurant_type = Field()
    # list, may have multiple cuisine type (Italian, barfood)
    cuisine_type = Field()

    # max 10 pics
    # folder address
    food_image_links = Field()

    # List
    menu_img_links = Field()
Beispiel #21
0
class UserItem(Item):
    # define the fields for your item here like:
    # name = scrapy.Field()
    id = Field()
    url_token = Field()
    name = Field()
    avatar_url = Field()
    headline = Field()
    description = Field()
    url = Field()

    gender = Field()
    cover_url = Field()
    type = Field()
    badge = Field()

    answer_count = Field()
    articles_count = Field()
    commercial_question_count = Field()
    favorite_count = Field()
    favorited_count = Field()
    follower_count = Field()
    following_columns_count = Field()
    following_count = Field()
    pins_count = Field()
    question_count = Field()
    thank_from_count = Field()
    thank_to_count = Field()
    thanked_count = Field()
    vote_from_count = Field()
    vote_to_count = Field()
    voteup_count = Field()
    following_favlists_count = Field()
    following_question_count = Field()
    following_topic_count = Field()
    marked_answers_count = Field()
    mutual_followees_count = Field()
    hosted_live_count = Field()
    participated_live_count = Field()

    locations = Field()
    educations = Field()
    employments = Field()
class StackItem(Item):
    domain = Field()
    title = Field()
    url = Field()
Beispiel #23
0
class DoubanItem(scrapy.Item):
    movieId = Field()  # 电影id
    movieUrl = Field()  # 电影链接
    title = Field()  # 电影名      string
    ratingValue = Field()  # 评分 string
    ratingPeople = Field()  # 评分人数   int
    directors = Field()  # 导演  list
    casts = Field()  # 编剧  list
    coverUrl = Field()  # 封面地址 string
    actors = Field()  # 主演  list
    category = Field()  # 一级分类  影视
    tags = Field()  # 二级分类  list
    country = Field()  # 制片国家/地区
    movieLanguage = Field()  # 语言
    pubDate = Field()  # 发布日期  数组 因为有的会有中国和其它国家的发布日期
    runtime = Field()  # 电影时长 int 秒
    titleOthers = Field()  # 又名
    IMDbUrl = Field()  # IMDb 链接
    introduction = Field()  # 简介
    won = Field()  # 获奖情况
    comments = Field()  # 影评  作者 + 影评链接
    etc = Field()  # 其它字段  dict
Beispiel #24
0
class Cai500Item(Item):
    # define the fields for your item here like:
    # name = scrapy.Field()
    query_date = Field()
    match_name = Field()
    turn = Field()
    match_time = Field()
    both_sides = Field()
    score = Field()
    league_table = Field()
    home_team_rank = Field()
    guest_team_rank = Field()
    home_team_points = Field()
    guest_team_points = Field()
    odds = Field()
Beispiel #25
0
class UserItem(Item):
    table_name = 'userTable'
    user_avatar = Field()
    user_url = Field()
    user_id = Field()
    user_name = Field()
    data_hovercard_id = Field()
    user_location = Field()
    friends = Field()
    revs = Field()
    photos = Field()
    countFive = Field()
    countFour = Field()
    countThree = Field()
    countTwo = Field()
    countOne = Field()
    lastDate = Field()
Beispiel #26
0
class DetailLinkItem(scrapy.Item):
    href = Field()
Beispiel #27
0
class WebmdItem(Item):
    Condition = Field()
    Drug = Field()
    Indication = Field()
    Type = Field()
    Use = Field()
    HowtoUse = Field()
    Sides = Field()
    Precautions = Field()
    Interactions = Field()
    BrandName = Field()
    GenName = Field()
    AvoidUse = Field()
    Allergies = Field()
    DrugId = Field()
    NumReviews = Field()
    Effectiveness = Field()
    EaseofUse = Field()
    Satisfaction = Field()
    Reviews = Field()
Beispiel #28
0
class DetailItem(scrapy.Item):

    crawled = Field()       # 抓取时间
    spider = Field()        # 爬虫名称
    href = Field()          # 来源

    title = Field()         # 标题
    mileage = Field()       # 表显里程
    emission = Field()      # 排放标准
    gearbox = Field()       # 变速箱
    transfers = Field()     # 过户次数
    region = Field()        # 城市 - 根据url解析得到

    manufacturer = Field()  # 厂商
    level = Field()         # 级别
    engine = Field()        # 发动机
    gearbox_d = Field()     # 变速箱详细信息
    struction = Field()     # 车身结构
    LWH = Field()           # 长宽高
    wheelbase = Field()     # 轴距
    luggage = Field()       # 行李箱容积
    curb_weight = Field()   # 整备质量
    displacement = Field()  # 排量
    intake_form = Field()   # 进气形式
    cylinders = Field()     # 气缸数
    horsepower = Field()    # 最大马力
    torque = Field()        # 最大扭矩
    fuel = Field()          # 燃料类型
    fuel_label = Field()    # 燃油标号
    fuel_supply = Field()   # 供油方式
    drive_method = Field()  # 驱动方式
    assistance = Field()    # 助力类型
    front_suspension = Field()   # 前悬挂类型
    back_suspension = Field()    # 后悬挂类型
    front_brake = Field()   # 前制动类型
    back_brake = Field()    # 后制动类型
    driving_brake = Field() # 驱车制动类型
    front_tire = Field()    # 前轮胎类型
    back_tire = Field()     # 后轮胎类型
    front_airbag = Field()  # 主/副驾驶安全气囊
    side_airbag = Field()   # 前/后排侧气囊
    head_airbag = Field()   # 前/后排头部气囊
    tire_pressure = Field() # 胎压监测
    locking = Field()       # 车内中控锁
    child_seat = Field()    # 儿童座椅接口
    keyless = Field()       # 无钥匙启动
    abs = Field()           # 防抱死系统(ABS)
    esp = Field()           # 车身稳定控制(ESP)
    sunroof = Field()       # 电动天窗
    skylight = Field()      # 全景天窗
    suction_door = Field()  # 电动吸合门
    induction_trunk = Field()   # 感应后备箱
    induction_wiper = Field()   # 感应雨刷
    back_wiper = Field()    # 后雨刷
    windows = Field()       # 前/后电动车窗
    mirror_elec = Field()   # 后视镜电动调节
    mirror_heat = Field()   # 后视镜加热
    steering_wheel = Field()    # 多功能方向盘
    cruise = Field()        # 定速巡航
    air_cond = Field()      # 后排独立空调
    air_control = Field()   # 空调控制方式
    gps = Field()           # GPS导航
    radar = Field()         # 倒车雷达
    image = Field()         # 倒车影像系统
    leather = Field()       # 真皮座椅
    seat_heat = Field()     # 前/后排座椅加热
class Tweet(Item):
    ID = Field()  # tweet id
    url = Field()  # tweet url
    datetime = Field()  # post time
    text = Field()  # text content
    user_id = Field()  # user id
    usernameTweet = Field()  # username of tweet

    nbr_retweet = Field()  # nbr of retweet
    nbr_favorite = Field()  # nbr of favorite
    nbr_reply = Field()  # nbr of reply

    is_reply = Field()  # boolean if the tweet is a reply or not
    is_retweet = Field(
    )  # boolean if the tweet is just a retweet of another tweet

    has_image = Field()  # True/False, whether a tweet contains images
    images = Field()  # a list of image urls, empty if none

    has_video = Field()  # True/False, whether a tweet contains videos
    videos = Field()  # a list of video urls

    has_media = Field(
    )  # True/False, whether a tweet contains media (e.g. summary)
    medias = Field()  # a list of media
Beispiel #30
0
class UserRelationItem(scrapy.Item):
    collection = 'users'
    id = Field()
    follows = Field()
    fans = Field()