Ejemplo n.º 1
0
 def __init__(self, type):
     self.config = ConfigProvider('LianJiaConfig.cfg')
     self.is_csv = self.config.get('data', 'csv')
     self.is_db = self.config.get('data', 'db')
     self.type = type
     if self.is_db == 'ON':
         self.con, self.cursor = self.prepare()
         self.dbinit()
Ejemplo n.º 2
0
 def __init__(self, typeName, configName):
     self.type = typeName
     self.config = ConfigProvider(configName)
     if self.type == 'sale':
         self.link = '.lianjia.com/ershoufang/'
     elif self.type == 'deal':
         self.link = '.lianjia.com/chengjiao/'
     else:
         self.link = ''
         logging.warning('输入类型错误,需要为sale或者deal')
Ejemplo n.º 3
0
class ChatManager():
    idsDic = {}
    aliasDic = {}
    config = {}
    listeners = {}

    def addIRCChannelToDictionary(self, channel):
        self.idsDic[channel] = {'type':'irc', 'channel':channel, 'alias':'#' + channel}
        self.aliasDic[channel] = {'type':'irc', 'channel':channel, 'alias':'#' + channel}


    def addSkypeChannelToDictionary(self, channel):
        channel['type'] = "skype"
        channel['alias'] = "&" + channel['name']
        self.idsDic[channel['channel']] = channel
        self.aliasDic['&' + channel['name']] = channel

    def __init__(self):
      if (self.idsDic == {}):
        self.configProvider = ConfigProvider()
        skypesection = self.configProvider.getConfigSection('skype')
        ircsection = self.configProvider.getConfigSection('irc')
        for channel in ircsection['channels']:
            self.addIRCChannelToDictionary(channel)
        for channel in skypesection['channels']:
            self.addSkypeChannelToDictionary(channel)
            
    def getChanelByAlias(self, alias):
        return self.aliasDic.get(alias, None)
    def getChanelById(self, Id):
        return self.idsDic.get(Id, None)
    def getAliases(self):
        return self.aliasDic.keys()
    def getIds(self):
        return self.idsDic.keys()
    def isListening(self, channel1, channel2):
        if(self.listeners.keys().count(channel1) and self.listeners[channel1].count(channel2)):
            return True
    def turnListenOn(self, channel1, channel2):
        aliases = self.getAliases()
        if(self.getAliases().count(channel1) and self.getAliases().count(channel2) and channel1!=channel2):
            if(self.isListening(channel1,channel2) != True ):
              if self.listeners.get(channel1, False) == False:
                  self.listeners[channel1] = []
              self.listeners[channel1].append(channel2)
              return self.listeners
            elif(channel1 == channel2):
              return "nonono"
            else:
              return "already listening"
    def turnListenOff(self, channel1, channel2):
        if(self.isListening(channel1, channel2)):
          self.listeners[channel1].remove(channel2)
          return self.listeners
Ejemplo n.º 4
0
class UrlsPro:
    def __init__(self, typeName, configName):
        self.type = typeName
        self.config = ConfigProvider(configName)
        if self.type == 'sale':
            self.link = '.lianjia.com/ershoufang/'
        elif self.type == 'deal':
            self.link = '.lianjia.com/chengjiao/'
        else:
            self.link = ''
            logging.warning('输入类型错误,需要为sale或者deal')

    # 生成第一个请求url,目的在于获取页数
    def getFirstUrls(self):
        city = self.config.get(self.type, 'city')
        areas = self.config.get(self.type, 'area').split('/')
        if self.link == '' or city == '' or areas == '':
            return []
        urls = []
        for area in areas:
            urls.append('https://' + city + self.link + area + '/')
        return urls

    # 若选择多城市爬取,生成各城市请求url
    def getMultiCityUrls(self):
        urls = []
        if self.isMulti():
            cities = self.config.get(self.type, 'multi_city').split('/')
            if cities == '':
                return urls
            for city in cities:
                urls.append('https://' + city + self.link)
        return urls

    def getMinPage(self):
        page = self.config.get(self.type, 'min_page')
        if page is None:
            return 0
        return int(page)

    def isMulti(self):
        multi = self.config.get(self.type, 'enable_multi')
        if multi == 'True':
            return True
        else:
            return False

    def getAllUrls(self):
        urls = self.getFirstUrls()
        urls.extend(self.getMultiCityUrls())
        return urls
 def __init__(self):
   if(self.__state):
     self.__dict__ = self.__state
   else:
     self.configProvider = ConfigProvider()
     self.gatesProvider = GatesProvider()
     self.chatMgr = ChatManager()
Ejemplo n.º 6
0
 def __init__(self):
   if (self.idsDic == {}):
     self.configProvider = ConfigProvider()
     skypesection = self.configProvider.getConfigSection('skype')
     ircsection = self.configProvider.getConfigSection('irc')
     for channel in ircsection['channels']:
         self.addIRCChannelToDictionary(channel)
     for channel in skypesection['channels']:
         self.addSkypeChannelToDictionary(channel)
class InterChatDispatcher():
  __state = {}
  def __init__(self):
    if(self.__state):
      self.__dict__ = self.__state
    else:
      self.configProvider = ConfigProvider()
      self.gatesProvider = GatesProvider()
      self.chatMgr = ChatManager()
  def launch(self):
    #launch irc
    irc_config = self.configProvider.getConfigSection('irc');
    self.irc_thread = IRCThread( irc_config['host'], irc_config['port'], irc_config['nick'], irc_config['channels'], self)
    self.connexion = self.irc_thread.getConnexion()
    self.irc_thread.start()
    self.skype_thread = InterChatSkypeThread(self)
    self.skype_thread.start()
  def dispatchIRCCommand(self, nick, message, channel):
    print "Message: "+message+" From " + channel
    return "oui"

  def dispatchMessageToChannel(self, channel, message):
    channelTo = self.chatMgr.getChanelByAlias(channel)
    if(channelTo['type'] == 'irc'):          
      self.irc_thread.writeToChannel(message, channelTo['channel'])
    if(channelTo['type'] == 'skype'):
      self.skype_thread.writeToChannel(message, channelTo['channel'])

  def messageToChannel(self, channel, message, nick=""):
    parts = message.split(" ")
    if(len(parts) > 1 and self.chatMgr.getChanelByAlias(parts[1])):
        channelTo = self.chatMgr.getChanelByAlias(parts[1])
        body = message.replace("!tele "+parts[1],"")         
        rmessage = "tele from {0}@{1}: {2}".format(nick, channel, body)
        if(channelTo['type'] == 'irc'):          
          self.irc_thread.writeToChannel(rmessage, channelTo['channel'])
        elif(channelTo['type'] == 'skype'):
          self.skype_thread.writeToChannel(rmessage, channelTo['channel'])
  def helpToChannel(self, channel, message):
    with open('config/help.txt','r') as f:
      for i,l in enumerate(f):
        self.dispatchMessageToChannel(channel, l)
  def getChannelList(self):
    return self.chatMgr.getAliases()
  def dispatchMessage(self, chatname, sendername, body):
#    print body
    channel = self.chatMgr.getChanelById(chatname)
    if( sendername in ['pdobot', 'ictelecom'] ):
      return False;
    for channelTo in self.chatMgr.getAliases():
      channelToFull = self.chatMgr.getChanelByAlias(channelTo)
      if(self.chatMgr.isListening(channelTo, channel['alias'])):
        print "BINGO"
        if (channelToFull['type'] == 'irc'):
          message = "{0}{1}: {2}".format(sendername, channel['alias'], body)
          self.irc_thread.writeToChannel(message, channelTo)
    return False
  def listenChatOn(self, channel1, channel2):
      return self.chatMgr.turnListenOn(channel1, channel2)
  def listenChatOff(self, channel1, channel2):
      return self.chatMgr.turnListenOff(channel1, channel2)      
Ejemplo n.º 8
0
class DataTo:
    def __init__(self, type):
        self.config = ConfigProvider('LianJiaConfig.cfg')
        self.is_csv = self.config.get('data', 'csv')
        self.is_db = self.config.get('data', 'db')
        self.type = type
        if self.is_db == 'ON':
            self.con, self.cursor = self.prepare()
            self.dbinit()

    # 建立数据库连接,进行初始化工作,返回数据库连接
    def prepare(self):
        con = pymysql.connect(host=self.config.get('DB', 'host'),
                              user=self.config.get('DB', 'user'),
                              passwd=self.config.get('DB', 'password'),
                              port=int(self.config.get('DB', 'port')),
                              charset='utf8')
        cursor = con.cursor()
        return con, cursor

    def dbinit(self):
        try:
            # 建立数据库与表
            self.cursor.execute(
                'CREATE DATABASE IF NOT EXISTS HOUSECRAWL DEFAULT CHARSET utf8 COLLATE utf8_general_ci'
            )
            self.cursor.execute('USE HOUSECRAWL')
            createSale = """CREATE TABLE IF NOT EXISTS `sale`(
            `house_id` VARCHAR(20),
            `city` VARCHAR(20),
            `district` VARCHAR(30),
            `title` VARCHAR(200) NOT NULL,
            `area` VARCHAR(100) NOT NULL,
            `description` VARCHAR(200) NOT NULL,
            `attention` VARCHAR(50) NOT NULL,
            `putdate` VARCHAR(50) NOT NULL,
            `totalprice` FLOAT,
            `unitprice` FLOAT,
            PRIMARY KEY (`house_id`))ENGINE=InnoDB DEFAULT CHARSET=utf8"""
            createDeal = """CREATE TABLE IF NOT EXISTS `deal`(
            `house_id` VARCHAR(20),
            `city` VARCHAR(20),
            `district` VARCHAR(30),
            `title` VARCHAR(100) NOT NULL,
            `description` VARCHAR(150) NOT NULL,
            `saletime` VARCHAR(20) NOT NULL,
            `dealdate` VARCHAR(20) NOT NULL,
            `saleprice` FLOAT,
            `dealprice` FLOAT,
            `unitprice` FLOAT,
            PRIMARY KEY (`house_id`))ENGINE=InnoDB DEFAULT CHARSET=utf8"""
            self.cursor.execute(createSale)
            self.cursor.execute(createDeal)
            self.con.commit()
        except:
            self.con.rollback()

    def insert(self, items):
        # 在售二手房插入sql,利用on duplicate key update 去重,注意类型必须均为%s
        insertSale = """insert into sale(house_id,city,district,title,area,description,attention,putdate,totalprice,unitprice) 
        VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) 
        on duplicate key update 
        city=VALUES(city),district=VALUES(district),title=VALUES(title),area=VALUES(area),description=VALUES(area),
        attention=VALUES(attention),putdate=VALUES(putdate),totalprice=VALUES(totalprice),unitprice=VALUES(unitprice)"""
        # 成交二手房插入sql
        insertDeal = """insert into deal(house_id,city,district,title,description,saletime,dealdate,saleprice,dealprice,unitprice) 
        VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) 
        on duplicate key update 
        city=VALUES(city),district=VALUES(district),title=VALUES(title),description=VALUES(description),
        saletime=VALUES(saletime),dealdate=VALUES(dealdate),saleprice=VALUES(saleprice),dealprice=VALUES(dealprice),
        unitprice=VALUES(unitprice)"""
        try:
            if self.type == 'sale':
                self.cursor.executemany(insertSale, items)
            if self.type == 'deal':
                self.cursor.executemany(insertDeal, items)
            self.con.commit()
        except:
            self.con.rollback()

    # 返回csv列名
    def getName(self):
        if self.type == 'sale':
            return ["标题", "区域", "描述", "关注信息", "总价(万)", "单价(元/平方米)"]
        if self.type == 'deal':
            return [
                "标题", "描述", "成交周期", "成交日期", "挂牌价(万)", "成交价(万)", "单价(元/平方米)"
            ]