class WeiboItem(scrapy.Item): collection = 'weibos' id = Field() attitudes_count = Field() comments_count = Field() reposts_count = Field() source = Field() pictures = Field() text = Field() textLength = Field() thumbnail = Field() user = Field() created_at = Field() crawled_at = Field()
class TaiwanselectItem(Item): # define the fields for your item here like: # name = scrapy.Field() title = Field() time = Field() content = Field()
class User(Item): ID = Field() # user id name = Field() # user name screen_name = Field() # user screen name avatar = Field() # avator url
class BeikeItem(Item): house_name = Field() sale_status = Field() house_type = Field() house_address = Field() house_price = Field()
class CommentItem(Item): table_name = 'commentTable' restaurant = Field() rest_url = Field() review_id = Field() user_id = Field() score = Field() date = Field() comment = Field() useful = Field() funny = Field() cool = Field() label = Field()
class RaceItem(Item): year = Field() year_url = Field() winner_url = Field() race_map = Field() src_racemap = Field() winner_photo = Field() winner_flag = Field() winner_name = Field() climber_url = Field() climber_photo = Field() climber_flag = Field() climber_name = Field() sprinter_url = Field() sprinter_photo = Field() sprinter_flag = Field() sprinter_name = Field() young_url = Field() young_photo = Field() young_flag = Field() young_name = Field()
class AcersrentkolItem(scrapy.Item): carpet_area = Field() updated_date = Field() management_by_landlord = Field() areacode = Field() mobile_lister = Field() google_place_id = Field() #immediate_possession = Field() age = Field() address = Field() price_on_req = Field() sublocality = Field() config_type = Field() platform = Field() city = Field() listing_date = Field() txn_type = Field() property_type = Field() Building_name = Field() lat = Field() longt = Field() locality = Field() #sqft = Field() Status = Field() listing_by = Field() name_lister = Field() Selling_price = Field() Monthly_Rent = Field() details = Field() data_id = Field() Possession = Field() Launch_date = Field() price_per_sqft = Field() Bua_sqft = Field() quality1 = Field() quality2 = Field() quality3 = Field() quality4 = Field() scraped_time = Field()
class BaseItem(Item): company_href = Field() # 公司链接 company_name = Field() # 公司名 companysize_text = Field() # 公司规模 industryField = Field() # 公司业务 experience = Field() # 工作经验 issuedate = Field() # 更新时间 job_href = Field() # 工作链接 job_name = Field() # 工作名 job_info = Field() # 职位要求 job_keyword = Field() # 关键字 providesalary_text = Field() # 工资 workarea_text = Field() # 工作地点 crawl_time = Field() # 爬取时间
class MainPostItem(Item): """ 主题帖对象 """ # 主题帖所在论坛id main_post_forum_id = Field() # 主题帖所在论坛名称 main_post_forum_name = Field() # 主题帖id main_post_id = Field() # 主题帖第一个id main_post_first_id = Field() # 主题帖话题标志 main_post_topic_flag = Field() # 主题帖置顶标志 main_post_top_flag = Field() # 主题帖精品标志 main_post_good_flag = Field() # 主题帖链接 main_post_url = Field() # 主题帖标题 main_post_title = Field() # 主题帖回复数(回复贴数) main_post_reply_count = Field() # 主题帖内容 main_post_content = Field() # 主题帖评论数 main_post_comment_num = Field() # 主题帖作者信息 post_author_infor = Field()
class MatplotlibItem(scrapy.Item): # define the fields for your item here like: # name = scrapy.Field() file_urls = Field() files = Field()
class Job51Item(BaseItem): companytype_text = Field() # 公司类型 jobwelf = Field() # 公司待遇
class ReviewItem(Item): name = Field() rank = Field() reviewer = Field() email = Field() country = Field()
class AmazonspiderItem(Item): client_url = Field() asins = Field() error_message = Field()
class JobItem(Item): url = Field() name = Field() location = Field() salary = Field() pub_date = Field() available_number = Field() job_category = Field() exp = Field() edu = Field() duty = Field() demand = Field() welfare = Field() work_time = Field() address = Field() company_name = Field() company_scale = Field() company_type = Field() company_industry = Field() from_website = Field() scrape_time = Field()
class InformationItem(Item): """ 个人信息 """ _id = Field() # 用户ID NickName = Field() # 昵称 Gender = Field() # 性别 Province = Field() # 所在省 City = Field() # 所在城市 BriefIntroduction = Field() # 简介 Autograph = Field() # 签名 Birthday = Field() # 生日 Num_Tweets = Field() # 微博数 Num_Follows = Field() # 关注数 Num_Fans = Field() # 粉丝数 SexOrientation = Field() # 性取向 Sentiment = Field() # 感情状况 VIPlevel = Field() # 会员等级 Authentication = Field() # 认证 URL = Field() # 首页链接
class CompanyIdItem(Item): _id = Field()
class RelationshipsItem(Item): """ 用户关系,只保留与关注的关系 """ Host1 = Field() Host2 = Field() # 被关注者的ID
class ImageItem(Item): image_urls = Field() images = Field() image_paths = Field()
class TopicItem(Item): # define the fields for your item here like: # name = scrapy.Field() type = Field() title = Field() link = Field() writer = Field() post_time = Field() profile_link = Field() departure_time = Field() return_time = Field() destination = Field() views = Field() replies = Field() likes = Field() def istopic(self): return True
class Restaurant(Item): id = Field() name = Field() address = Field() link = Field() rating = Field() liked = Field() phone_number = Field() avg_cost = Field() # enum maybe cash, card or both bill_acceptance = Field() # List maybe or str zone = Field() been_there_count = Field() # dict maybe, from monday to sunday opening_hours = Field() # list may in multiple collection collection = Field() # list, may have multiple cuisine type (casual dining, bar) restaurant_type = Field() # list, may have multiple cuisine type (Italian, barfood) cuisine_type = Field() # max 10 pics # folder address food_image_links = Field() # List menu_img_links = Field()
class UserItem(Item): # define the fields for your item here like: # name = scrapy.Field() id = Field() url_token = Field() name = Field() avatar_url = Field() headline = Field() description = Field() url = Field() gender = Field() cover_url = Field() type = Field() badge = Field() answer_count = Field() articles_count = Field() commercial_question_count = Field() favorite_count = Field() favorited_count = Field() follower_count = Field() following_columns_count = Field() following_count = Field() pins_count = Field() question_count = Field() thank_from_count = Field() thank_to_count = Field() thanked_count = Field() vote_from_count = Field() vote_to_count = Field() voteup_count = Field() following_favlists_count = Field() following_question_count = Field() following_topic_count = Field() marked_answers_count = Field() mutual_followees_count = Field() hosted_live_count = Field() participated_live_count = Field() locations = Field() educations = Field() employments = Field()
class StackItem(Item): domain = Field() title = Field() url = Field()
class DoubanItem(scrapy.Item): movieId = Field() # 电影id movieUrl = Field() # 电影链接 title = Field() # 电影名 string ratingValue = Field() # 评分 string ratingPeople = Field() # 评分人数 int directors = Field() # 导演 list casts = Field() # 编剧 list coverUrl = Field() # 封面地址 string actors = Field() # 主演 list category = Field() # 一级分类 影视 tags = Field() # 二级分类 list country = Field() # 制片国家/地区 movieLanguage = Field() # 语言 pubDate = Field() # 发布日期 数组 因为有的会有中国和其它国家的发布日期 runtime = Field() # 电影时长 int 秒 titleOthers = Field() # 又名 IMDbUrl = Field() # IMDb 链接 introduction = Field() # 简介 won = Field() # 获奖情况 comments = Field() # 影评 作者 + 影评链接 etc = Field() # 其它字段 dict
class Cai500Item(Item): # define the fields for your item here like: # name = scrapy.Field() query_date = Field() match_name = Field() turn = Field() match_time = Field() both_sides = Field() score = Field() league_table = Field() home_team_rank = Field() guest_team_rank = Field() home_team_points = Field() guest_team_points = Field() odds = Field()
class UserItem(Item): table_name = 'userTable' user_avatar = Field() user_url = Field() user_id = Field() user_name = Field() data_hovercard_id = Field() user_location = Field() friends = Field() revs = Field() photos = Field() countFive = Field() countFour = Field() countThree = Field() countTwo = Field() countOne = Field() lastDate = Field()
class DetailLinkItem(scrapy.Item): href = Field()
class WebmdItem(Item): Condition = Field() Drug = Field() Indication = Field() Type = Field() Use = Field() HowtoUse = Field() Sides = Field() Precautions = Field() Interactions = Field() BrandName = Field() GenName = Field() AvoidUse = Field() Allergies = Field() DrugId = Field() NumReviews = Field() Effectiveness = Field() EaseofUse = Field() Satisfaction = Field() Reviews = Field()
class DetailItem(scrapy.Item): crawled = Field() # 抓取时间 spider = Field() # 爬虫名称 href = Field() # 来源 title = Field() # 标题 mileage = Field() # 表显里程 emission = Field() # 排放标准 gearbox = Field() # 变速箱 transfers = Field() # 过户次数 region = Field() # 城市 - 根据url解析得到 manufacturer = Field() # 厂商 level = Field() # 级别 engine = Field() # 发动机 gearbox_d = Field() # 变速箱详细信息 struction = Field() # 车身结构 LWH = Field() # 长宽高 wheelbase = Field() # 轴距 luggage = Field() # 行李箱容积 curb_weight = Field() # 整备质量 displacement = Field() # 排量 intake_form = Field() # 进气形式 cylinders = Field() # 气缸数 horsepower = Field() # 最大马力 torque = Field() # 最大扭矩 fuel = Field() # 燃料类型 fuel_label = Field() # 燃油标号 fuel_supply = Field() # 供油方式 drive_method = Field() # 驱动方式 assistance = Field() # 助力类型 front_suspension = Field() # 前悬挂类型 back_suspension = Field() # 后悬挂类型 front_brake = Field() # 前制动类型 back_brake = Field() # 后制动类型 driving_brake = Field() # 驱车制动类型 front_tire = Field() # 前轮胎类型 back_tire = Field() # 后轮胎类型 front_airbag = Field() # 主/副驾驶安全气囊 side_airbag = Field() # 前/后排侧气囊 head_airbag = Field() # 前/后排头部气囊 tire_pressure = Field() # 胎压监测 locking = Field() # 车内中控锁 child_seat = Field() # 儿童座椅接口 keyless = Field() # 无钥匙启动 abs = Field() # 防抱死系统(ABS) esp = Field() # 车身稳定控制(ESP) sunroof = Field() # 电动天窗 skylight = Field() # 全景天窗 suction_door = Field() # 电动吸合门 induction_trunk = Field() # 感应后备箱 induction_wiper = Field() # 感应雨刷 back_wiper = Field() # 后雨刷 windows = Field() # 前/后电动车窗 mirror_elec = Field() # 后视镜电动调节 mirror_heat = Field() # 后视镜加热 steering_wheel = Field() # 多功能方向盘 cruise = Field() # 定速巡航 air_cond = Field() # 后排独立空调 air_control = Field() # 空调控制方式 gps = Field() # GPS导航 radar = Field() # 倒车雷达 image = Field() # 倒车影像系统 leather = Field() # 真皮座椅 seat_heat = Field() # 前/后排座椅加热
class Tweet(Item): ID = Field() # tweet id url = Field() # tweet url datetime = Field() # post time text = Field() # text content user_id = Field() # user id usernameTweet = Field() # username of tweet nbr_retweet = Field() # nbr of retweet nbr_favorite = Field() # nbr of favorite nbr_reply = Field() # nbr of reply is_reply = Field() # boolean if the tweet is a reply or not is_retweet = Field( ) # boolean if the tweet is just a retweet of another tweet has_image = Field() # True/False, whether a tweet contains images images = Field() # a list of image urls, empty if none has_video = Field() # True/False, whether a tweet contains videos videos = Field() # a list of video urls has_media = Field( ) # True/False, whether a tweet contains media (e.g. summary) medias = Field() # a list of media
class UserRelationItem(scrapy.Item): collection = 'users' id = Field() follows = Field() fans = Field()