コード例 #1
0
    def __init__(self, *args, **kwargs):
        super(Animals, self).__init__(orientation="vertical")
        global app
        app = App.get_running_app()
        scrollview = ScrollView()
        self.list = MDList()
        self.database = Database(dbtype='sqlite', dbname='animals.db')
        self.rewrite_list()
        scrollview.add_widget(self.list)
        self.add_widget(scrollview)
        button_box = BoxLayout(orientation='horizontal', size_hint_y=0.1)

        #button 1
        btn_animal = MDFlatButton()
        btn_animal.text = "Add new animal"
        btn_animal.font_style = "Button"
        btn_animal.on_release = self.on_create_animal

        # button 2
        btn_type = MDFlatButton()
        btn_type.text = "Add new animal type"
        btn_type.font_style = "Button"
        btn_type.on_release = self.on_create_type

        button_box.add_widget(btn_animal)
        button_box.add_widget(btn_type)
        self.add_widget(button_box)
コード例 #2
0
 def delete_user_alert(email, currency, rate_exchange):
     Database.remove(collection="all_alert",
                     query={
                         "email": email,
                         "currency": currency,
                         "rate_exchange": rate_exchange
                     })
コード例 #3
0
    def set_bingo_board(self, bingo_board):
        """
        Update the restaurant user's bingo board using the board.
        """
        try:
            # convert the date string to a python date
            if not isinstance(bingo_board["expiry_date"], datetime):
                date = [
                    int(part) for part in bingo_board["expiry_date"].split("/")
                ]
                bingo_board["expiry_date"] = datetime(date[2], date[0], date[1],
                                                      23, 59, 59)

            # convert ids to object ids
            bingo_board["board"] = Database.replace_object_id(
                bingo_board["board"])
            bingo_board["board_reward"] = Database.replace_object_id(
                bingo_board["board_reward"])

            # if new user, update current board as well as future board
            boards = {"future_board": bingo_board}
            if self.get_future_board()["name"] == "":
                boards["bingo_board"] = copy.deepcopy(bingo_board)
                boards["future_board"]["expiry_date"] = boards["future_board"]["expiry_date"] + \
                                                        timedelta(days=90)

            self.rpm.db.update('restaurant_users',
                               {"username": self.rpm.get_id()},
                               {'$set': boards})
        except (UpdateFailureException, KeyError):
            print("There was an issue updating a bingo board.")
コード例 #4
0
ファイル: appF.py プロジェクト: dar1enyang/Currency-Reminder
def initialize():
    Database.initialize()
    session['email'] = session.get('email')
    session['name'] = session.get('name')

    scheduler = BackgroundScheduler()
    scheduler.add_job(check_alert, "cron", day_of_week="0-4", hour="16", minute=30)
    scheduler.start()
コード例 #5
0
def initialize():
    Database.initialize()
    session['email'] = session.get('email')
    session['name'] = session.get('name')
    # job schedul setting
    scheduler = BackgroundScheduler()
    # scheduler.add_job(check_alert, "interval", seconds=10)
    # scheduler.add_job(check_alert, "cron", day_of_week="0-4", hour="17", minute=30)
    scheduler.start()
コード例 #6
0
 def update_user_alert(email, currency, rate_exchange, price):
     Database.update(collection="all_alert",
                     query={
                         "email": email,
                         "currency": currency,
                         "rate_exchange": rate_exchange
                     },
                     data={"$set": {
                         "price": price
                     }})
コード例 #7
0
ファイル: app.py プロジェクト: whitefly/money_rate
def inti_request():
    Database.inti()
    # 定义session['email']的功能,否则找不到会报错. 要是浏览器重启,好像也直接报错
    session['email'] = session.get('email')
    session['name'] = session.get('name')

    # 周1到周5,全天,一分钟刷新一次
    clock = BackgroundScheduler()
    clock.add_job(Deliver.send_simple_message, 'cron', day_of_week='0-4', hour="0-23", minute="0-59")
    clock.start()
コード例 #8
0
 def test_getFinanciers(self):
     db = Database()
     db.insertFinancier(
         Financier(cnpj="31.213.941/0001-37",
                   name="Financiador Teste 1",
                   rate=70,
                   term=72,
                   warranty="Terreno"))
     db.insertFinancier(
         Financier(cnpj="24.642.112/0001-04",
                   name="Financiador Teste 2",
                   rate=70,
                   term=48,
                   warranty="Imóvel"))
     db.insertFinancier(
         Financier(cnpj="19.576.165/0001-34",
                   name="Financiador Teste 3",
                   rate=35,
                   term=36,
                   warranty="Imóvel"))
     financiers = db.getFinanciers(rate=70, term=72, warranty="Terreno")
     db.database.financier.delete_one({'cnpj': "31.213.941/0001-37"})
     db.database.financier.delete_one({'cnpj': "24.642.112/0001-04"})
     db.database.financier.delete_one({'cnpj': "19.576.165/0001-34"})
     self.assertEqual(len(financiers), 1)
     self.assertEqual(financiers[0].name, "Financiador Teste 1")
     self.assertEqual(financiers[0].rate, 70)
     self.assertEqual(financiers[0].term, 72)
     self.assertEqual(financiers[0].warranty, "Terreno")
コード例 #9
0
 def test_getCompany(self):
     db = Database()
     db.insertCompany(
         Company(cnpj="93.612.749/0001-70",
                 name="Companhia Teste 1",
                 rate=25,
                 term=72,
                 warranty="Terreno"))
     db.insertCompany(
         Company(cnpj="27.456.797/0001-92",
                 name="Companhia Teste 2",
                 rate=25,
                 term=48,
                 warranty="Imóvel"))
     db.insertCompany(
         Company(cnpj="26.805.322/0001-00",
                 name="Companhia Teste 3",
                 rate=35,
                 term=36,
                 warranty="Imóvel"))
     company = db.getCompany(cnpj="93.612.749/0001-70")
     db.database.company.delete_one({'cnpj': "93.612.749/0001-70"})
     db.database.company.delete_one({'cnpj': "27.456.797/0001-92"})
     db.database.company.delete_one({'cnpj': "26.805.322/0001-00"})
     self.assertEqual(company.name, "Companhia Teste 1")
     self.assertEqual(company.rate, 25)
     self.assertEqual(company.term, 72)
     self.assertEqual(company.warranty, "Terreno")
コード例 #10
0
ファイル: main.py プロジェクト: Dr4kk0nnys/Financial-System
    def __init__(self):

        self.main_database = Database('profit_and_debt.txt')
        self.secondary_database = Database('product_sales_info.txt')

        while True:

            user_input = input('> ')
            if (user_input.lower() == 'exit'):
                return

            self.handle_input(user_input)
コード例 #11
0
ファイル: appF.py プロジェクト: dar1enyang/Currency-Reminder
def initialize():
    Database.initialize()
    session['email'] = session.get('email')
    session['name'] = session.get('name')

    scheduler = BackgroundScheduler()
    scheduler.add_job(check_alert,
                      "cron",
                      day_of_week="0-4",
                      hour="16",
                      minute=30)
    scheduler.start()
コード例 #12
0
ファイル: app.py プロジェクト: carlos-menezes/TranslateBot
 def __init__(self):
     # Stores the current time in seconds since the Epoch. Used later to prevent the bot from replying to old comment every time it is executed.
     self.executed_timestamp = time.time()
     self.database = Database()
     # Authenticating with Reddit
     self.reddit = praw.Reddit('translate_bot')
     # Stream of comments
     self.comment_stream = self.reddit.subreddit('all').stream.comments(
         pause_after=-1)
     # Stream of inbox messages
     self.inbox_stream = praw.models.util.stream_generator(
         self.reddit.inbox.unread, pause_after=-1)
     # Start
     self.main()
コード例 #13
0
 def create_alert(email, currency, rate_exchange, price):
     alert_data = Database.find_one(collection="all_alert",
                                    query={"email": email, "currency": currency, "rate_exchange": rate_exchange})
     if alert_data is not None:
         return False
     All_alert(email, currency, rate_exchange, price).save_to_db()
     return True
コード例 #14
0
ファイル: mqtt_manager.py プロジェクト: alexp25/mqtt-server
    def load_sensors(self):
        self.logg.log("load sensors")
        try:
            self.db = Database.instance()
            sensors = self.db.get_sensors()
            self.logg.log(sensors)
            t_create = time.time()
            if sensors is not None:
                for s in sensors:
                    s1: Sensor = Sensor()
                    s1.id = s["sensor_id"]
                    s1.log_rate = s["log_rate"]
                    s1.topic_name = s["topic_name"]
                    s1.topic_code = s["topic_code"]
                    s1.type = s["sensor_type_code"]
                    s1.ts = t_create
                    s1.log_ts = t_create
                    # self.logg.log(json.dumps(s1.__dict__))
                    self.sensors.append(s1)

            topics = self.db.get_topics()

            if topics is not None:
                for t in topics:
                    t1: MQTTTopic = MQTTTopic(t)
                    self.topics.append(t1)

            self.logg.log(self.topics)
            self.logg.log(self.sensors)
        except:
            self.logg.log(Utils.format_exception(self.__class__.__name__))
コード例 #15
0
ファイル: main.py プロジェクト: umich-dbgroup/litmus
def start_thread(mode, db_name, qid, task, info, tq_rank, log_dir=None):
    config = ConfigParser.RawConfigParser(allow_no_value=True)
    config.read('config.ini')

    db = Database(config.get('database', 'user'), config.get('database', 'pw'), config.get('database', 'host'), db_name, config.get('database', 'cache_dir'), timeout=config.get('database', 'timeout'), buffer_pool_size=config.get('database', 'buffer_pool_size'))
    parser = SQLParser(db_name, config.get('parser', 'cache_dir'))

    # only load aig if info includes range
    aig = None
    if (mode == 'greedybb' or mode == 'greedyfirst') and info == 'range':
        aig = AIG(db, os.path.join(config.get('aig', 'dir'), db_name + '.aig'))

    data = json.loads(task)
    task_cleaned = {
        'cqs': {},
        'ans': []
    }
    for cqid, cq in data['cqs'].items():
        task_cleaned['cqs'][int(cqid)] = cq
    for cqid in data['ans']:
        task_cleaned['ans'].append(int(cqid))
    task = task_cleaned

    if log_dir:
        log_path = os.path.join(log_dir, str(qid) + '.log')

        with Logger(log_path):
            return run_task(mode, db, parser, qid, task, info, aig, tq_rank)
    else:
        return run_task(mode, db, parser, qid, task, info, aig, tq_rank)
コード例 #16
0
ファイル: user.py プロジェクト: dar1enyang/Currency-Reminder
 def check_user(email, password):
     user_data = Database.find_one(collection="users", query={"email": email})
     if user_data is None:
         return False
     if User.check_hash_password(password, user_data["password"]) is False:
         return False
     return True
コード例 #17
0
ファイル: user.py プロジェクト: joyc/currency_remind
 def register_user(name, email, password):
     user_data = Database.find_one(collection="users",
                                   query={"email": email})
     if user_data is not None:
         return False
     User(name, email, User.hash_password(password)).save_to_db()
     return True
コード例 #18
0
ファイル: setup.py プロジェクト: ZikeYan/RoutedFusion
def get_database(dataset, config, mode='train'):

    #TODO: make this better
    database_config = copy(config.DATA)
    database_config.transform = transform.ToTensor()
    database_config.scene_list = eval('config.DATA.{}_scene_list'.format(mode))

    return Database(dataset, database_config)
コード例 #19
0
 def __init__(self, username, collection):
     """
     Initialize a profile using the username, app instance and database collection.
     """
     self.db = Database.get_instance()
     self.id = username.lower()
     self.fullname = ""
     self.hashed_pw = ""
     self.database_collection = collection
コード例 #20
0
 def connect(self):
     self.ext_apis = Constants.conf["ENV"]["EXT_API"]
     self.logstart = Constants.conf["ENV"]["EXT_API_LOG_INIT"]
     print(self.ext_apis)
     self.db = Database.instance()
     sensors: List[Sensor] = self.db.get_sensors()
     print(sensors)
     if sensors is not None:
         self.check_create_sensors(sensors)
コード例 #21
0
 def find_user_alert(email, rate_kind) -> dict:
     """
     功能:返回 用户的关注的所有货币
     """
     return Database.find(Alert.collection_name,
                          query={
                              "email": email,
                              "rate_kind": rate_kind
                          })
コード例 #22
0
 def __init__(self, *a, **kw):
     super(ChomeSpider, self).__init__(*a, **kw)
     dispatcher.connect(self.spider_closed, signals.spider_closed)
     terminal = DatabaseTerminal(sys.argv, self.name)
     self.d = terminal.get_arguments()
     self.xml = CommonXml()
     self.exc = ZmagsException(5)
     if self.d['database']:
         self.database = Database()
         self.database.connect()
         self.products, self.no_urls = self.database.select_products(
             self.d['catalog_id'], self.d['product_id'])
         self.database.disconnect()
     else:
         self.get_lists_from_excel()
     self.add_properties(self.xml)
     self.images_store = "/" + settings['IMAGES_STORE']
     self.total = len(self.no_urls['product_ids'])
コード例 #23
0
ファイル: user.py プロジェクト: whitefly/money_rate
 def update_user_email(old_email, new_email):
     """
     用来修改用户的邮箱
     """
     return Database.update_one(User.collection_name,
                                query={"email": old_email},
                                data={"$set": {
                                    "email": new_email
                                }})
コード例 #24
0
 def __init__(self, *a, **kw):
     super(GuitarCenterSpider, self).__init__(*a, **kw)
     dispatcher.connect(self.spider_closed, signals.spider_closed)
     terminal = DatabaseTerminal(sys.argv, self.name)
     self.d = terminal.get_arguments()
     self.xml = CommonXml()
     self.exc = ZmagsException(5)
     if self.d['database']:
         self.database = Database()
         self.database.connect()
         self.products, self.no_urls = self.database.select_products(self.d['catalog_id'],
                                                                     self.d['product_id'])
         self.database.disconnect()
     else:
         self.get_lists_from_excel()
     self.add_properties(self.xml)
     self.handle_not_provided()
     self.start_urls = self.products['urls']
     self.total = len(self.products['urls'])
コード例 #25
0
 def delete_user_alert(email, current, rate_kind):
     """
     功能:删除 某个货币监控
     """
     return Database.delete(Alert.collection_name,
                            query={
                                "email": email,
                                "current": current,
                                "rate_kind": rate_kind
                            })
コード例 #26
0
 def create_alert(email, currency, rate_exchange, price):
     alert_data = Database.find_one(collection="all_alert",
                                    query={
                                        "email": email,
                                        "currency": currency,
                                        "rate_exchange": rate_exchange
                                    })
     if alert_data is not None:
         return False
     All_alert(email, currency, rate_exchange, price).save_to_db()
     return True
コード例 #27
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('db')
    parser.add_argument('--qid', type=int)
    args = parser.parse_args()

    config = ConfigParser.RawConfigParser(allow_no_value=True)
    config.read('config.ini')

    db = Database(config.get('database', 'user'),
                  config.get('database', 'pw'),
                  config.get('database', 'host'),
                  args.db,
                  config.get('database', 'cache_dir'),
                  timeout=config.get('database', 'timeout'),
                  buffer_pool_size=config.get('database', 'buffer_pool_size'))
    parser = SQLParser(args.db, config.get('parser', 'cache_dir'))

    tasks = load_tasks(config.get('main', 'data_dir'), args.db)

    tqcs = load_tqc_cache(config, args.db)

    # load qids to exclude
    excludes = find_excludes(args.db)

    if args.qid:
        tqcs[args.qid] = tqc_for_task(db, parser, args.qid, tasks[args.qid])
    else:
        for qid, task in tasks.items():
            print('QUERY {}'.format(qid))
            if qid in excludes:
                print('Skipping non-SPJ query.')
                print()
                continue
            elif qid in tqcs:
                print('Loaded from cache.')
            else:
                tqcs[qid] = tqc_for_task(db, parser, qid, task)
                save_tqc_cache(config, args.db, tqcs)
            print('TQ Confusion: {}'.format(tqcs[qid]))
            print()

        easy = 0
        hard = 0
        for qid, tqc in tqcs.items():
            if qid in excludes:
                continue
            if tqc <= 0.75:
                easy += 1
            else:
                hard += 1
        print('TQC <= 0.75: {}'.format(easy))
        print('TQC > 0.75: {}'.format(hard))
コード例 #28
0
class TestStorage(unittest.TestCase):
    def setUp(self):
        self.service = Service()
        self.database = Database()
        open("test.service", "w+").close()
        open("test.db", "w+").close()

    def test_write_read_service(self):
        self.service.service_name = "Hello"
        self.service.username = "******"
        self.service.password = "******"

        storage.write("test", self.service, "test.service")
        service2 = Service()
        storage.read("test", service2, "test.service")
        self.assertEqual(service2.service_name, self.service.service_name)
        self.assertEqual(service2.username, self.service.username)
        self.assertEqual(service2.password, self.service.password)

    def test_write_read_database(self):
        self.database.add_service(Service())
        self.database.add_service(Service())
        self.database.name = "Hey"

        storage.write("test", self.database, "test.db")
        database2 = Database()
        storage.read("test", database2, "test.db")
        self.assertEqual(database2.name, self.database.name)
        for i in range(len(self.database.services)):
            self.assertEqual(database2.services[i].service_name,
                             self.database.services[i].service_name)
            self.assertEqual(database2.services[i].username,
                             self.database.services[i].username)
            self.assertEqual(database2.services[i].password,
                             self.database.services[i].password)

    def tearDown(self):
        os.remove(os.getcwd() + "/test.service")
        os.remove(os.getcwd() + "/test.db")
コード例 #29
0
 def update_user_alert(email, current, rate_kind, price):
     """
     功能:修改 货币监控价格
     """
     return Database.update_one(Alert.collection_name,
                                query={
                                    "email": email,
                                    "current": current,
                                    "rate_kind": rate_kind
                                },
                                data={"$set": {
                                    "price": price
                                }})
コード例 #30
0
 def get_restaurant_name_by_id(object_id):
     """
     Given a restaurant user's database id, return the restaurant's name.
     Returns "" on failure.
     """
     try:
         db = Database.get_instance()
         user = db.query("restaurant_users",
                         {"_id": ObjectId(object_id)})[0]
         return user["profile"]["name"]
     except (QueryFailureException, IndexError, KeyError, InvalidId):
         print("Something's wrong with the query.")
         return ""
コード例 #31
0
class Stats:
    def __init__(self):
        self.__db = Database()

    def get_bin_per_day(self, order, interval=30):
        """
        Getting a list of bin per day on a specific interval
        :return: List of bin with count and date
        """
        # Getting the list of the number of bin creations in the last 60 days
        cursor = self.__db.get_cursor()
        logger.info(f"Getting stats for the latest {interval} days with {order} order")
        logger.debug(f"Retrieving bin number per day")
        query = """
                        select
                            count(*) as count,
                            date(created) as date
                        from
                            `bin`
                        where
                            created >= date_sub(curdate(), interval %s day)
                        group by 
                            date
                        order by `date` ASC

                        """
        cursor.execute(query, (interval,))
        result = list(cursor.fetchall())
        insertions_list = []
        if len(result) >= 1:
            if order == 'DESC':
                logger.debug("Reversing the stats list")
                result = result[::-1]
            logger.debug(f"Found {len(result)} Bins in time range")
            for data in result:
                log_data = {
                    "insertions": data[0],
                    "day": data[1]
                }
                insertions_list.append(log_data)
        cursor.close()
        self.__db.done()
        return insertions_list

    def get_last_bin_timestamp(self):
        cursor = self.__db.get_cursor()
        logger.debug(f"Getting last insertion datetime")
        query = """
                    SELECT `created` FROM `bin` ORDER BY id DESC LIMIT 1
                        """
        cursor.execute(query, )
        result = list(cursor.fetchall())
        if len(result) >= 1:
            logger.debug(f"Found latest insertion date")
            lastInsertTime = datetimeutil.ISO8601.from_datetime_obj(result[0][0])
            cursor.close()
            self.__db.done()
            return lastInsertTime
コード例 #32
0
ファイル: burton_spider.py プロジェクト: marjevtic/testMarko
 def __init__(self, *a, **kw):
     super(BurtonSpider, self).__init__(*a, **kw)
     dispatcher.connect(self.spider_closed, signals.spider_closed)
     terminal = DatabaseTerminal(sys.argv, self.name)
     self.d = terminal.get_arguments()
     self.xml = CommonXml()
     self.exc = ZmagsException(5, "Burton")
     if self.d['database']:
         self.database = Database()
         self.database.connect()
         self.products, self.no_urls = self.database.select_products(
             self.d['catalog_id'], self.d['product_id'])
         self.database.disconnect()
     else:
         self.get_lists_from_excel()
     self.handle_not_provided()
     burton.add_properties(self.xml)
     self.start_urls = self.products['urls']
     self.start_urls = [
         "http://www.dickssportinggoods.com/product/index.jsp?productId=13243074"
     ]
     self.images_store = "/" + settings['IMAGES_STORE']
     self.total = len(self.start_urls)
コード例 #33
0
 def __init__(self, *a, **kw):
     super(GuitarCenterSpider, self).__init__(*a, **kw)
     dispatcher.connect(self.spider_closed, signals.spider_closed)
     terminal = DatabaseTerminal(sys.argv, self.name)
     self.d = terminal.get_arguments()
     self.xml = CommonXml()
     self.exc = ZmagsException(5)
     if self.d["database"]:
         self.database = Database()
         self.database.connect()
         self.products, self.no_urls = self.database.select_products(self.d["catalog_id"], self.d["product_id"])
         self.database.disconnect()
     else:
         self.get_lists_from_excel()
     self.add_properties(self.xml)
     self.handle_not_provided()
     self.start_urls = self.products["urls"]
     self.total = len(self.products["urls"])
コード例 #34
0
ファイル: chome_spider.py プロジェクト: marjevtic/testMarko
 def __init__(self, *a, **kw):
     super(ChomeSpider, self).__init__(*a, **kw)
     dispatcher.connect(self.spider_closed, signals.spider_closed)
     terminal = DatabaseTerminal(sys.argv, self.name)
     self.d = terminal.get_arguments()
     self.xml = CommonXml()
     self.exc = ZmagsException(5)
     if self.d['database']:
         self.database = Database()
         self.database.connect()
         self.products, self.no_urls = self.database.select_products(self.d['catalog_id'],
                                                                     self.d['product_id'])
         self.database.disconnect()
     else:
         self.get_lists_from_excel()
     self.add_properties(self.xml)
     self.images_store = "/" + settings['IMAGES_STORE']
     self.total = len(self.no_urls['product_ids'])
コード例 #35
0
    def __init__(self, *a, **kw):
        super(SportmanSpider, self).__init__(*a, **kw)
        dispatcher.connect(self.spider_closed, signals.spider_closed)
        terminal = DatabaseTerminal(sys.argv, self.name)
        self.d = terminal.get_arguments()
        self.xml = CommonXml()
        self.exc = ZmagsException(5, "Sportmann")

        if self.d["database"]:
            self.database = Database()
            self.database.connect()
            self.products, self.no_urls = self.database.select_products(self.d["catalog_id"], self.d["product_id"])
            self.database.disconnect()
        else:
            self.get_lists_from_excel()
        self.add_properties(self.xml)
        self.start_urls = self.products["urls"]
        self.images_store = "/" + settings["IMAGES_STORE"]
        self.total = len(self.start_urls)
コード例 #36
0
ファイル: burton_spider.py プロジェクト: marjevtic/testMarko
 def __init__(self, *a, **kw):
     super(BurtonSpider, self).__init__(*a, **kw)
     dispatcher.connect(self.spider_closed, signals.spider_closed)
     terminal = DatabaseTerminal(sys.argv, self.name)
     self.d = terminal.get_arguments()
     self.xml = CommonXml()
     self.exc = ZmagsException(5, "Burton")
     if self.d['database']:
         self.database = Database()
         self.database.connect()
         self.products, self.no_urls = self.database.select_products(self.d['catalog_id'],
                                                                     self.d['product_id'])
         self.database.disconnect()
     else:
         self.get_lists_from_excel()
     self.handle_not_provided()
     burton.add_properties(self.xml)
     self.start_urls = self.products['urls']
     self.start_urls = ["http://www.dickssportinggoods.com/product/index.jsp?productId=13243074"]
     self.images_store = "/" + settings['IMAGES_STORE']
     self.total = len(self.start_urls)
コード例 #37
0
ファイル: lydias_spider.py プロジェクト: marjevtic/testMarko
 def __init__(self, *a, **kw):
     super(LydiasSpider, self).__init__(*a, **kw)
     dispatcher.connect(self.spider_closed, signals.spider_closed)
     terminal = DatabaseTerminal(sys.argv, self.name)
     self.d = terminal.get_arguments()
     self.xml = VariantsXml()
     self.exc = ZmagsException(5)
     if self.d['database']:
         self.database = Database()
         self.database.connect()
         self.products, self.no_urls = self.database.select_products(self.d['catalog_id'],
                                                                     self.d['product_id'])
         self.database.disconnect()
     else:
         self.get_lists_from_excel()
     # fix for bug with links they provide
     self.products['urls'] = basic.cut_string_field(self.products['urls'], "&cat=")
     self.handle_not_provided()
     self.start_urls = self.products['urls']
     self.images_store = "/" + settings['IMAGES_STORE']
     lydias.add_properties(self.xml)
     self.total = len(self.products['urls'])
コード例 #38
0
 def __init__(self, *a, **kw):
     super(PartyliteSpider, self).__init__(*a, **kw)
     dispatcher.connect(self.spider_closed, signals.spider_closed)
     terminal = PartyliteTerminal(sys.argv, self.name)
     self.d = terminal.get_arguments()
     self.images_store = "/" + settings['IMAGES_STORE']
     self.users = party.get_users(settings, self.d)
     self.exc = ZmagsException(50)
     self.production = self.d['env']
     self.upload = self.d['upload']
     self.english = self.d['lang']
     self.file_name = self.d['file']
     if self.d['database']:
         self.database = Database()
         self.database.connect()
         self.products, self.no_urls = self.database.select_products(self.d['catalog_id'],
                                                                     self.d['product_id'])
         self.database.disconnect()
         self.change_url_list()
     else:
         self.get_lists_from_excel()
     self.xml = CommonXml()
     party.add_properties(self.xml)
     self.total = len(self.products['urls'])
コード例 #39
0
class SportmanSpider(CrawlSpider):
    name = "sportman"
    allowed_domains = ["example.com"]
    start_urls = ["http://www.example.com"]
    counter = 0

    def __init__(self, *a, **kw):
        super(SportmanSpider, self).__init__(*a, **kw)
        dispatcher.connect(self.spider_closed, signals.spider_closed)
        terminal = DatabaseTerminal(sys.argv, self.name)
        self.d = terminal.get_arguments()
        self.xml = CommonXml()
        self.exc = ZmagsException(5, "Sportmann")

        if self.d["database"]:
            self.database = Database()
            self.database.connect()
            self.products, self.no_urls = self.database.select_products(self.d["catalog_id"], self.d["product_id"])
            self.database.disconnect()
        else:
            self.get_lists_from_excel()
        self.add_properties(self.xml)
        self.start_urls = self.products["urls"]
        self.images_store = "/" + settings["IMAGES_STORE"]
        self.total = len(self.start_urls)

    def parse(self, response):
        self.counter += 1
        basic.print_status(self.counter, self.total)
        hxs = HtmlXPathSelector(response)
        item = SportmanItem()
        if "redirect_urls" in response.request.meta:
            cur_url = response.request.meta["redirect_urls"][0]
        else:
            cur_url = response.url
        index = self.products["urls"].index(cur_url)
        try:
            if "redirect_urls" in response.request.meta:
                item["product_id"] = [self.products["product_ids"][index]]
                item["name"] = [self.products["names"][index]]
                item["in_stock"] = ["NOT_AVAILABLE"]
                self.exc.code_handler(102, response.url)
                self.xml.create_xml(item)
                self.products["status"][index] = "no_avail"
            else:
                item["name"], item["short_desc"], item["description"], item["old_price"], item["custom_price"], item[
                    "product_id"
                ], item["sku"] = self.get_basic_info(hxs)
                item["in_stock"] = ["IN_STOCK"]
                viewstate, eventval, prevpage, hidden, view_page, even_page, pre_page, hidd_page = self.get_vars(
                    response, hxs
                )

                viewstate1 = viewstate[:2000]
                viewstate2 = viewstate[2000:4000]
                viewstate3 = viewstate[4000:6000]
                viewstate4 = viewstate[6000:8000]
                viewstate5 = viewstate[8000:10000]
                viewstate6 = viewstate[10000:]

                item["viewstate1"] = [basic.cdata(viewstate1)]
                item["viewstate2"] = [basic.cdata(viewstate2)]
                item["viewstate3"] = [basic.cdata(viewstate3)]
                item["viewstate4"] = [basic.cdata(viewstate4)]
                item["viewstate5"] = [basic.cdata(viewstate5)]
                item["viewstate6"] = [basic.cdata(viewstate6)]
                item["eventval"] = [basic.cdata(eventval)]
                item["size_options"] = self.get_variants(hxs, response)

                images_url = self.get_images(hxs)

                item["normal_image_url"] = self.get_server_path(images_url)

                self.xml.create_xml(item)
                item.clear()
                item["image_urls"] = self.get_images(hxs)
                self.products["status"][index] = "ran"
        except:
            self.exc.code_handler(100, response.url)
            self.products["status"][index] = "error"
        else:
            return item

    def get_basic_info(self, hxs):
        name = hxs.select('//div[@id="fragment-1"]/h2/text()').extract()

        short_desc = hxs.select('//div[@class="description2"]/text()').extract()

        description = hxs.select('//div[@id="fragment-1"]/div[@class="description"]').extract()
        description = sportman.delete_tags(re, description[0])
        description = [basic.cdata(description)]

        old_price = hxs.select('//span[@class="oldprice"]/text()').extract()
        if old_price != []:
            old_price = " ".join(old_price)
            old_price = old_price.split(":")
            old_price = old_price[1].replace("Kr", "")
            old_price = [old_price.replace(" ", "")]
        else:
            old_price = old_price

        price = hxs.select('//span[@class="nowprice"]/text()').extract()
        if price != []:
            price = " ".join(price)
            price = price.split(":")
            price = price[1].replace("Kr", "")
            price = [price.replace(" ", "")]
        else:
            price = hxs.select('//span[@class="normalprice"]/text()').extract()
            price = " ".join(price)
            price = price.split(":")
            price = price[1].replace("Kr", "")
            price = [price.replace(" ", "")]

        id = hxs.select('//div[@class="articlenumber"]').extract()
        id = " ".join(id)
        id = id.replace(u"\xa0", "")
        id = basic.get_middle_text(id, "Art.nr.", "</div>")
        sku = id
        id = [id[0]]

        return name, short_desc, description, old_price, price, id, sku

    def get_vars(self, response, hxs):
        headers1 = {
            "User-Agent": "Mozilla/5.0 (Windows NT 5.1; rv:13.0) Gecko/20100101 Firefox/13.0.1",
            "Host": "www.sportmann.no",
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
            "Accept-Language": "en-us,en;q=0.5",
            "Accept-Charset": "ISO-8859-1,utf-8;q=0.7,*;q=0.7",
            "Connection": "keep-alive",
            "Referer": "/product.aspx?productid=613232",
            "Cookie": "ASP.NET_SessionId=lurvsvrn3jxsfd45cedmsv45; Besok=922884e3-e9cb-4b69-b8c8-215f3cc988a9; __utma=184084580.1353376623.1312483243.1312483243.1312483243.1; __utmb=184084580.9.10.1312483243; __utmc=184084580; __utmz=184084580.1312483243.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)",
        }

        page = hxs.select("//html").extract()
        page = " ".join(page)

        viewst = basic.get_middle_text(page, 'id="__VIEWSTATE" value="', '"')
        eventval = basic.get_middle_text(page, 'id="__EVENTVALIDATION" value="', '"')
        prevpage = [""]
        hidden_field = [""]

        r = requests.get(response.url, headers=headers1)

        page_one = r.content

        viewst_page = basic.get_middle_text(page_one, 'id="__VIEWSTATE" value="', '"')
        eventval_page = basic.get_middle_text(page_one, 'id="__EVENTVALIDATION" value="', '"')
        prevpage_page = basic.get_middle_text(page_one, 'id="__PREVIOUSPAGE" value="', '"')
        hidden_temp = page_one.split('id="__VIEWSTATE"')
        hidden_temp = hidden_temp[1].split('id="__PREVIOUSPAGE"')
        hidden_temp = hidden_temp[0].split("<script sr")

        val_x = len(hidden_temp) - 1

        hidden_temp = basic.get_middle_text(hidden_temp[val_x], 'c="', '"')
        hidden_temp_val = hidden_temp[0]
        hidden_temp_val = hidden_temp_val.replace("amp;", "")
        hidden_url = "http://www.sportmann.no" + hidden_temp_val

        request_hidden = urllib2.Request(hidden_url)
        response_hidden = urllib2.urlopen(request_hidden)
        hidden_field_page = basic.get_middle_text(
            response_hidden.read(), "ctl00_ScriptManager1_HiddenField').value += '", "';"
        )

        return (
            viewst[0],
            eventval[0],
            prevpage[0],
            hidden_field[0],
            viewst_page[0],
            eventval_page[0],
            prevpage_page[0],
            hidden_field_page[0],
        )

    def get_variants(self, hxs, response):
        page = hxs.select("//html").extract()
        page = " ".join(page)
        dict_one = {}
        test_one = []

        temp = page.split('<div class="color">')
        temp = temp[1].split("</div>")
        temp = temp[0].split("<select name")

        viewstate, eventvalidation, previouspage, hiddenfield, view_page, even_page, pre_page, hidd_page = self.get_vars(
            response, hxs
        )

        if len(temp) == 1:
            color = hxs.select('//div[@class="color"]/text()').extract()
            value = hxs.select('//input[@id="ctl00_ContentPlaceHolder1_Variant1Hidden"]/@value').extract()
            color[0] = color[0].replace("  ", "")
            color = basic.clean_string(color[0])
            value = value[0]

        #            color = basic.clean_string(color[0])
        #            color = color.replace("  ","")
        #
        #            dict['color'] = color
        #            dict['color_value'] = value[0]

        else:
            test_color = basic.get_middle_text(temp[1], "farge</option>", "</select>")
            color = basic.get_middle_text(test_color[0], '">', "</option>")
            value = basic.get_middle_text(test_color[0], 'value="', '">')

            for i in range(0, len(color)):
                color[i] = color[i].replace("  ", "")
            #
            #                dict['color'] = color
            #                dict['color_value'] = value

        size_temp = page.split('<div class="size">')
        size_temp = size_temp[1].split("</div>")
        size_temp = size_temp[0].split("<select name")

        if len(size_temp) == 1:
            size = hxs.select('//div[@class="size"]/text()').extract()
            size = basic.clean_string(size[0])
            size = [size.replace("   ", "")]

            size_val = hxs.select('//input[@id="ctl00_ContentPlaceHolder1_Variant2Hidden"]/@value').extract()

            if size[0] == "":
                for i in range(len(value)):
                    resp_page = self.get_data(response, hidd_page, view_page, pre_page, even_page, value[i])

                    a_page = resp_page.split('<div class="siz')
                    a_page = a_page[1].split("</select>")

                    if len(a_page) == 1:

                        size = basic.get_middle_text(a_page[0], 'e">', '<input type="hidden"')
                        size_val = basic.get_middle_text(a_page[0], 'value="', '"')
                        size_val = size_val[0]
                        size_val = [size_val]

                    else:
                        a_page = basic.get_middle_text(a_page[0], "se</option>", "</select>")
                        size = basic.get_middle_text(a_page[0], '">', "</option>")
                        size_val = basic.get_middle_text(a_page[0], 'value="', '">')

                    dict_one["color"] = color[i]
                    dict_one["color_value"] = value[i]
                    dict_one["size_value"] = size_val

                    for x in range(0, len(size)):
                        size[x] = basic.clean_string(size[x])
                        size[x] = size[x].replace("   ", "")

                        dict_one["size"] = size

                    test_one.append(basic.cdata(json.dumps(dict_one)))

            else:
                dict_one["color"] = color

                dict_one["color_value"] = value
                dict_one["size"] = size
                dict_one["size_value"] = size_val
                test_one.append(basic.cdata(simplejson.dumps(dict_one)))

        else:
            test_size = basic.get_middle_text(size_temp[1], "se</option>", "</select>")
            size = basic.get_middle_text(test_size[0], '">', "</option>")
            size_val = basic.get_middle_text(test_size[0], 'value="', '">')

            for x in range(0, len(size)):
                size[x] = basic.clean_string(size[x])
                size[x] = size[x].replace("   ", "")

            dict_one["color"] = color
            dict_one["color_value"] = value
            dict_one["size"] = size
            dict_one["size_value"] = size_val

            test_one.append(basic.cdata(json.dumps(dict_one)))

        return test_one

    def get_server_path(self, url):
        images_array = []
        for i in range(0, len(url)):
            url[i] = basic.clean_string(url[i])

            images_array.append(self.images_store + "/full/" + hashlib.sha1(url[i]).hexdigest() + ".jpg")

        return images_array

    def get_images(self, hxs):
        page = hxs.select("//html").extract()
        page = " ".join(page)

        images = []

        temp = page.split('class="gallery_demo_unstyled"')
        temp = temp[1].split('<div class="right_container">')
        temp = basic.get_middle_text(temp[0], 'src="', '"')

        for i in range(0, len(temp)):
            image_url = "http://www.sportmann.no" + temp[i]
            images.append(image_url)

        return images

    def get_data(self, response, hidden, viewstate, previouspage, eventvalidation, colorvalue):
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 5.1; rv:5.0) Gecko/20100101 Firefox/5.0",
            "Host": "www.sportmann.no",
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
            "Accept-Language": "en-us,en;q=0.5",
            "Accept-Charset": "ISO-8859-1,utf-8;q=0.7,*;q=0.7",
            "Connection": "keep-alive",
            "Referer": "http://www.sportmann.no/product.aspx?productid=613232",
            "Cookie": "",
        }

        eventvalidation = urllib.urlencode({"__EVENTVALIDATION": eventvalidation})
        viewstate = urllib.urlencode({"__VIEWSTATE": viewstate})
        previouspage = urllib.urlencode({"__PREVIOUSPAGE": previouspage})
        hidden = urllib.urlencode({"ctl00_ScriptManager1_HiddenField": hidden})

        data = (
            "ctl00%24ScriptManager1=ctl00%24ContentPlaceHolder1%24dropdownPanel%7Cctl00%24ContentPlaceHolder1%24ddlVariant&"
            + hidden
            + "%3B%3BAjaxControlToolkit%2C%20Version%3D3.0.20820.16598%2C%20Culture%3Dneutral%2C%20PublicKeyToken%3D28f01b0e84b6d53e%3Aen-US%3A707835dd-fa4b-41d1-89e7-6df5d518ffb5%3Ae2e86ef9%3A1df13a87%3A8ccd9c1b%3A9ea3f0e2%3A9e8e87e9%3A4c9865be%3Aba594826%3A757f92c2%3Ac7c04611%3Acd120801%3Ac4c00916%3A3858419b%3A96741c43%3A38ec41c0%3B%3BAjaxControlToolkit%2C%20Version%3D3.0.20820.16598%2C%20Culture%3Dneutral%2C%20PublicKeyToken%3D28f01b0e84b6d53e%3Aen-US%3A707835dd-fa4b-41d1-89e7-6df5d518ffb5%3Ae2e86ef9%3A1df13a87%3A8ccd9c1b%3A9ea3f0e2%3A9e8e87e9%3A4c9865be%3Aba594826%3A757f92c2%3Ac7c04611%3Acd120801%3Ac4c00916%3A3858419b%3A96741c43%3A38ec41c0%3B%3BAjaxControlToolkit%2C%20Version%3D3.0.20820.16598%2C%20Culture%3Dneutral%2C%20PublicKeyToken%3D28f01b0e84b6d53e%3Aen-US%3A707835dd-fa4b-41d1-89e7-6df5d518ffb5%3Ae2e86ef9%3A1df13a87%3A8ccd9c1b%3A9ea3f0e2%3A9e8e87e9%3A4c9865be%3Aba594826%3A757f92c2%3Ac7c04611%3Acd120801%3Ac4c00916%3A3858419b%3A96741c43%3A38ec41c0&__EVENTTARGET=ctl00%24ContentPlaceHolder1%24ddlVariant&__EVENTARGUMENT=&__LASTFOCUS=&"
            + viewstate
            + "&"
            + previouspage
            + "&"
            + eventvalidation
            + "&ctl00%24ProductSearch%24txtProdSearch=&ctl00%24ProductSearch%24TextBoxWatermarkProdSearch_ClientState=&ctl00%24ContentPlaceHolder1%24ddlVariant="
            + colorvalue
            + "&ctl00%24ContentPlaceHolder1%24Variant1Hidden=&ctl00%24ContentPlaceHolder1%24Variant2Hidden=&ctl00%24ContentPlaceHolder1%24tbAmount=1&ctl00%24ContentPlaceHolder1%24modTellFriend%24tellFriend%24txtFriendsName=&ctl00%24ContentPlaceHolder1%24modTellFriend%24tellFriend%24vceFriendsName_ClientState=&ctl00%24ContentPlaceHolder1%24modTellFriend%24tellFriend%24txtFriendsEmail=&ctl00%24ContentPlaceHolder1%24modTellFriend%24tellFriend%24vceFriendsEmail_ClientState=&ctl00%24ContentPlaceHolder1%24modTellFriend%24tellFriend%24txtYourName=&ctl00%24ContentPlaceHolder1%24modTellFriend%24tellFriend%24vceYourName_ClientState=&ctl00%24ContentPlaceHolder1%24modTellFriend%24tellFriend%24txtYourEmail=&ctl00%24ContentPlaceHolder1%24modTellFriend%24tellFriend%24vceYourEmail_ClientState=&ctl00%24ContentPlaceHolder1%24modTellFriend%24tellFriend%24txtComment=&ctl00%24ContentPlaceHolder1%24modTellFriend%24tellFriend%24vceComment_ClientState=&__ASYNCPOST=true&"
        )

        # r = requests.get(response.url, h)
        req = urllib2.Request(response.url, data, headers)

        resp_page = urllib2.urlopen(req).read()

        return resp_page

    def spider_closed(self, spider):
        """Handles spider_closed signal from end of scraping.
        Handles usual end operations for scraper like writing xml, exporting
        to database and sending appropriate mail message."""
        msg = "Ran: {0}".format(datetime.now())
        if self.counter < self.total:
            msg += "\nScraper didn't go through all products, please report"
        msg += "\n\nScraped %d product out of %d\n\n" % (self.counter, self.total)
        # filename for writing xml
        if self.d["database"]:
            try:
                self.database.connect()
                filename = self.database.get_name(self.d["catalog_id"])
                self.database.update_db(self.products)
                self.database.disconnect()
                msg += "\nRan from interface.\n"
            except:
                msg += "\nUpdating database failed, please report."
        else:
            msg += "\nRan from console.\n"
            filename = self.d["file"]
        self.xml.write_xml(self.name, filename)
        msg += self.exc.create_message(self.counter)
        if self.d["upload"]:
            exp = CommonExport()
            try:
                exp.xml_to_db(self.name, filename, "1ccd39a5-af4e-47cc-aebe-e0dede5b14d8")
                msg += "\n\nExport to database successful"
            except StandardError:
                msg += "\n\nExport to database failed"
        else:
            msg += "\n\nUpload to database not selected"
        from modules.mail import Mail

        mail = Mail()
        try:
            mail.send_mail(msg, "Sportmann: {0}".format(filename))
            if self.d["email"]:
                mail.send_mail(msg, "Sportmann: {0}".format(filename), self.d["email"])
        except:
            msg += "\nSending mail failed."
        if self.d["database"]:
            path = "logs/{0}".format(self.name)
            if not os.path.exists(path):
                os.makedirs(path)
            with open("{0}/{1}".format(path, filename), "w") as f:
                f.write(msg)

    def get_lists_from_excel(self):
        xls = DictExcel(basic.get_excel_path(self.name, self.d["file"]))
        self.products = dict()
        try:
            self.products["urls"] = xls.read_excel_collumn_for_urls(3, 15)
            self.products["product_ids"] = xls.read_excel_collumn_for_ids(1, 15)
            self.products["names"] = xls.read_excel_collumn(2, 15)
        except IOError as e:
            msg = "I/O error {0}: {1}".format(e.errno, e.strerror)
            msg += "\nError occurred for given file: {0}".format(self.d["file"])
            self.exc.code_handler(103, msg=msg)
        except StandardError:
            msg = "Error reading excel file"
            msg += "\nError occurred for given file: {0}".format(self.d["file"])
            self.exc.code_handler(103, msg=msg)
        self.products = xls.delete_duplicates_dict(self.products)
        self.products, self.no_urls = xls.separate_no_urls(self.products)
        self.products = xls._add_none_status(self.products)
        self.no_urls = xls._add_none_status(self.no_urls)

    def add_properties(self, xml):
        xml.add_property("short_desc", "Short Description", "text")
        xml.add_property("old_price", "Old Price", "text")
        xml.add_property("custom_price", "New Price", "text")
        xml.add_property("color_value", "Color Value", "text")
        xml.add_property("in_stock", "In Stock", "text")
        xml.add_property("size_val", "Size Value", "text_list")
        xml.add_property("sku", "Sku", "text")
        xml.add_property("size_options", "Size_options", "text_list")
        xml.add_property("viewstate1", "Viewstate1", "text_list")
        xml.add_property("viewstate2", "Viewstate2", "text_list")
        xml.add_property("viewstate3", "Viewstate3", "text_list")
        xml.add_property("viewstate4", "Viewstate4", "text_list")
        xml.add_property("viewstate5", "Viewstate5", "text_list")
        xml.add_property("viewstate6", "Viewstate6", "text_list")
        xml.add_property("eventval", "Eventval", "text_list")
        xml.add_property("hidden", "Hidden Field", "text_list")
        xml.add_property("prevpage", "Previous Page", "text_list")
        xml.add_property("recommended_product", "Recommended Product", "text_list")
コード例 #40
0
ファイル: user.py プロジェクト: dar1enyang/Currency-Reminder
 def save_to_db(self):
     Database.insert(collection="users", data=self.json())
コード例 #41
0
ファイル: playsound.py プロジェクト: choigit/kritzbot
import json
import time
from threading import Thread
from modules.api import API
from modules.database import Database
from modules.config import *
import os.path
import sys

database = Database(db_host, db_user, db_pass, db_name, db_autocommit)
database.database_connection()

class PlaySound:

	def __init__(self, user, cost):
		self.user = user
		self.api = API(1)
		self.cost = cost

	def playsound(self, sound):
		if(database.db_get_user_points_int(self.user) > self.cost):
			self.add_sound_to_queue(sound, self.get_filepath())
			database.db_minus_points_user(self.user, self.cost)
			# self.start_timer()
			PlaySoundTimerRun()
			return "{} just spent {} points on an audio clip!".format(self.user, self.cost)
		else:
			return "{}, you don't have enough points FailFish".format(self.user)

	def get_filepath(self):
		basepath = os.path.dirname(__file__)
コード例 #42
0
ファイル: user.py プロジェクト: dar1enyang/Currency-Reminder
 def find_user_data(email):
     return Database.find_one(collection="users", query={"email": email})
コード例 #43
0
ファイル: user.py プロジェクト: dar1enyang/Currency-Reminder
 def update_user_email(old_email, email):
     Database.update(collection="users", query={"email": old_email}, data={"$set": {"email": email}})
コード例 #44
0
 def save_to_db(self):
     Database.insert(collection="all_alert", data=self.json())
コード例 #45
0
class GuitarCenterSpider(CrawlSpider):
    name = "guitar_center"
    allowed_domains = ["musiciansfriend.com"]
    start_urls = ["http://www.musiciansfriend.com"]
    counter = 0

    def __init__(self, *a, **kw):
        super(GuitarCenterSpider, self).__init__(*a, **kw)
        dispatcher.connect(self.spider_closed, signals.spider_closed)
        terminal = DatabaseTerminal(sys.argv, self.name)
        self.d = terminal.get_arguments()
        self.xml = CommonXml()
        self.exc = ZmagsException(5)
        if self.d["database"]:
            self.database = Database()
            self.database.connect()
            self.products, self.no_urls = self.database.select_products(self.d["catalog_id"], self.d["product_id"])
            self.database.disconnect()
        else:
            self.get_lists_from_excel()
        self.add_properties(self.xml)
        self.handle_not_provided()
        self.start_urls = self.products["urls"]
        self.total = len(self.products["urls"])

    def parse(self, response):
        self.counter += 1
        basic.print_status(self.counter, self.total)
        hxs = HtmlXPathSelector(response)
        item = GuitarCenterItem()
        from scrapy.conf import settings

        if "redirect_urls" in response.request.meta:
            cur_url = response.request.meta["redirect_urls"][0]
        else:
            cur_url = response.url
        index = self.products["urls"].index(cur_url)
        try:
            item["product_id"] = [self.products["product_ids"][index]]
            item["name"], item["brand"] = self.get_basic_info(hxs)
            item["heading"], item["details"], item["specs"], item["call_to_action"] = self.get_description(hxs)
            item["brand_image"], item["brand_image_promo"], brand_images = self.get_description_images(hxs)
            item["old_price"], item["discount"], item["price"] = self.get_prices(hxs)
            item["image_json"], img = self.get_images(hxs)
            item["serial"] = self.get_serials(hxs)
            item["warranty"] = self.gold_coverage(hxs)
            item["in_stock"] = self.get_available(hxs)
            item["product_ref"], item["add_to_cart_id"] = self.get_add_to_cart(hxs)
            if not item["add_to_cart_id"]:
                item["in_stock"] = ["NOT_AVAILABLE"]
            item["shipping"] = self.get_shipping(hxs)
            item["colors"] = self.get_colors(hxs)
            self.products["status"][index] = "ran"
        except StandardError:
            self.products["status"][index] = "error"
            self.exc.code_handler(100, response.url)
        else:
            self.xml.create_xml(item)
            item["image_urls"] = img + brand_images
        return item

    def handle_not_provided(self):
        item = GuitarCenterItem()
        for n in self.no_urls["product_ids"]:
            item["product_id"] = [n]
            index = self.no_urls["product_ids"].index(n)
            item["name"] = [self.no_urls["names"][index]]
            item["in_stock"] = ["NOT_AVAILABLE"]
            self.xml.create_xml(item)

    def get_basic_info(self, hxs):
        name = hxs.select('//h1[@class="fn"]/text()').extract()
        name = [basic.clean_string("".join(name))]
        brand = hxs.select('//span[@class="brand"]/text()').extract()
        name = [name[0].replace(u"\xa0", "")]
        return name, brand

    def get_description_images(self, hxs):
        brand_image = hxs.select('//a[@class="brandImage"]/img/@src').extract()
        brand_image_promo = hxs.select('//div[@class="brandPromoLogo"]/img/@src').extract()
        images = brand_image + brand_image_promo
        if brand_image:
            brand_image = [self.get_server_path(brand_image[0])]
        if brand_image_promo:
            brand_image_promo = [self.get_server_path(brand_image_promo[0])]
        return brand_image, brand_image_promo, images

    def get_description(self, hxs):
        heading = hxs.select('//div[@id="description"]/p').extract()
        details = hxs.select('//p[@class="description"]').extract()
        specs = hxs.select('//div[@class="specs"]/ul').extract()
        last = hxs.select('//div[@class="callToAction"]/p/text()').extract()
        return basic.cdata_field(heading), basic.cdata_field(details), basic.cdata_field(specs), basic.cdata_field(last)

    # function for getting prices, returns tags and values or empty field if no option for one of them new is discount
    def get_prices(self, hxs):
        tag = hxs.select('//dl[@class="lineItemList"]/dt/text()').extract()
        value = hxs.select('//dl[@class="lineItemList"]/dd/text()').extract()
        old_price = []
        discount = []
        price = []
        if len(tag) > 1:
            old_price = [basic.clean_string(value[0])]
        try:
            discount = [basic.clean_string(value[len(value) - 1])]
        except IndexError:
            print "This product has no price."
        try:
            price = hxs.select('//span[@class="topAlignedPrice"]/text()').extract()
        except IndexError:
            print "This product has no price."
        if not old_price and not discount and not price:
            price = hxs.select('//dl[@class="inlineList"]/dd/text()').extract()
        return self.clean_price(old_price), self.clean_price(discount), self.clean_price(price)

    # returning json with image url and serial number of product image refers to
    def get_images(self, hxs):
        images = hxs.select('//ul[@id="prodDetailThumbs"]/li/a/@href').extract()
        tags = hxs.select('//ul[@id="prodDetailThumbs"]/li/@class').extract()
        images_list = []
        d = {}
        img = []
        for i in range(0, len(images)):
            d["image_url"] = self.get_server_path(images[i])
            img.append(images[i])
            if "site1sku" in tags[i]:
                d["product_serial"] = tags[i].replace("site1sku", "")
            else:
                d["product_serial"] = tags[i]
            images_list.append(basic.cdata(simplejson.dumps(d)))
        return images_list, img

    # function for getting serials and all information about them, currently returns field with jsons with all
    # information, can be modified to return dicts if needed for subproducts for those one day
    def get_serials(self, hxs):
        serials = hxs.select('//var[@class="hidden styleInfo"]/text()').extract()
        new = []
        for serial in serials:
            d = simplejson.loads(serial)
            new.append(basic.cdata(simplejson.dumps(d)))
        return new

    def get_server_path(self, url):
        # uncomment next line if you want to keep absolute image path from their site
        return url
        return IMAGES_STORE + "/full/" + hashlib.sha1(url).hexdigest() + ".jpg"

    # function for getting gold coverage from the page which is actually additional warranty options
    def gold_coverage(self, hxs):
        ids = hxs.select('//div[@class="goldCoverage"]/input[@type="checkbox"]/@value').extract()
        labels = hxs.select('//div[@class="goldCoverage"]/label/text()').extract()
        d = {}
        new = []
        for i in range(0, len(ids)):
            d["id"] = ids[i]
            d["name"] = labels[i]
            new.append(basic.cdata(simplejson.dumps(d)))
        return new

    # function for getting availability
    def get_available(self, hxs):
        p = hxs.select('//var[@class="hidden availability"]/text()').extract()
        if p:
            if p[0] == "in_stock":
                p = [p[0].upper()]
        else:
            # for those that have color options and in stock status for each of those
            # put IN_STOCK for the product as it has no that option on the page
            p = ["IN_STOCK"]
        return p

    # function for getting add to cart id and product reference
    def get_add_to_cart(self, hxs):
        try:
            temp = hxs.select('//span[@class="magicLink addToList"]/@data-rel').extract()[0]
        except:
            print "Product not available"
        else:
            return [temp.split("|")[0]], [temp.split("|")[1]]
        return [], []

    # function for gatting shipping information
    def get_shipping(self, hxs):
        return hxs.select('//div[@id="targeter_pdpShipping"]/span/text()').extract()

    # function for getting colors, return jsons with all the data about options
    def get_colors(self, hxs):
        colors = hxs.select('//var[@class="styleInfo"]/text()').extract()
        new = []
        for color in colors:
            d = simplejson.loads(color)
            new.append(basic.cdata(simplejson.dumps(d)))
        return new

    # cleaning price to leave only numbers
    def clean_price(self, price):
        new = []
        for i in price:
            new.append(re.sub("[^0-9.]", "", i))
        return new

    def spider_closed(self, spider):
        """Handles spider_closed signal from end of scraping.
        Handles usual end operations for scraper like writing xml, exporting
        to database and sending appropriate mail message."""
        msg = "Ran: {0}".format(datetime.now())
        if self.counter < self.total:
            msg += "\nScraper didn't go through all products, please report"
        msg += "\n\nScraped %d product out of %d\n\n" % (self.counter, self.total)
        # filename for writing xml
        if self.d["database"]:
            try:
                self.database.connect()
                filename = self.database.get_name(self.d["catalog_id"])
                self.database.update_db(self.products)
                self.database.disconnect()
                msg += "\nRan from interface.\n"
            except:
                msg += "\nUpdating database failed, please report."
        else:
            msg += "\nRan from console.\n"
            filename = self.d["file"]
        self.xml.write_xml(self.name, filename)
        msg += self.exc.create_message(self.counter)
        if self.d["upload"]:
            exp = CommonExport()
            try:
                exp.xml_to_db(self.name, filename, "4a9f5955-9b8e-4e13-84ef-95f937dbc00d")
                msg += "\n\nExport to database successful"
            except StandardError:
                msg += "\n\nExport to database failed"
        else:
            msg += "\n\nUpload to database not selected"
        ## part for exporting to database here
        from modules.mail import Mail

        mail = Mail()
        try:
            mail.send_mail(msg, "GuitarCenter: {0}".format(filename))
            if self.d["email"]:
                mail.send_mail(msg, "GuitarCenter: {0}".format(filename), self.d["email"])
        except:
            msg += "\nSending mail failed."
        if self.d["database"]:
            path = "logs/{0}".format(self.name)
            if not os.path.exists(path):
                os.makedirs(path)
            with open("{0}/{1}".format(path, filename), "w") as f:
                f.write(msg)

    def add_properties(self, xml):
        xml.add_property("old_price", "Old Price", "decimal")
        xml.add_property("image_json", "Image Json", "text_list")
        xml.add_property("discount", "Discount", "decimal")
        xml.add_property("product_ref", "Product Ref.", "text")
        xml.add_property("in_stock", "In Stock", "text")
        xml.add_property("serial", "Serial", "text_list")
        xml.add_property("colors", "Colors", "text_list")
        xml.add_property("add_to_cart_id", "Add To Cart ID", "text")
        xml.add_property("shipping", "Shipping", "text")
        xml.add_property("warranty", "Warranty", "text_list")
        xml.add_property("heading", "Heading", "text")
        xml.add_property("details", "Details", "text")
        xml.add_property("specs", "Specs", "text")
        xml.add_property("call_to_action", "Call To Action", "text")
        xml.add_property("brand_image", "Brand Image", "text")
        xml.add_property("brand_image_promo", "Brand Image Promo", "text")

    def get_lists_from_excel(self):
        xls = DictExcel(basic.get_excel_path(self.name, self.d["file"]))
        self.products = dict()
        try:
            self.products["urls"] = xls.read_excel_collumn_for_urls(3, 15)
            self.products["product_ids"] = xls.read_excel_collumn_for_ids(1, 15)
            self.products["names"] = xls.read_excel_collumn(2, 15)
        except IOError as e:
            msg = "I/O error {0}: {1}".format(e.errno, e.strerror)
            msg += "\nError occurred for given file: {0}".format(self.d["file"])
            self.exc.code_handler(103, msg=msg)
        except StandardError:
            msg = "Error reading excel file"
            msg += "\nError occurred for given file: {0}".format(self.d["file"])
            self.exc.code_handler(103, msg=msg)
        self.products = xls.delete_duplicates_dict(self.products)
        self.products, self.no_urls = xls.separate_no_urls(self.products)
        self.products = xls._add_none_status(self.products)
        self.no_urls = xls._add_none_status(self.no_urls)
コード例 #46
0
 def find_user_alert(email, rate_exchange):
     return Database.find(collection="all_alert", query={"email": email, "rate_exchange": rate_exchange})
コード例 #47
0
 def delete_user_alert(email, currency, rate_exchange):
     Database.remove(collection="all_alert",
                     query={"email": email, "currency": currency, "rate_exchange": rate_exchange})
コード例 #48
0
 def update_user_alert(email, currency, rate_exchange, price):
     Database.update(collection="all_alert",
                     query={"email": email, "currency": currency, "rate_exchange": rate_exchange},
                     data={"$set": {"price": price}})
コード例 #49
0
class PartyliteSpider(CrawlSpider):
    name = "partylite"
    allowed_domains = ["partylite.biz"]
    start_urls = ["http://www.zmags.com"]
    counter = 0

    def __init__(self, *a, **kw):
        super(PartyliteSpider, self).__init__(*a, **kw)
        dispatcher.connect(self.spider_closed, signals.spider_closed)
        terminal = PartyliteTerminal(sys.argv, self.name)
        self.d = terminal.get_arguments()
        self.images_store = "/" + settings['IMAGES_STORE']
        self.users = party.get_users(settings, self.d)
        self.exc = ZmagsException(50)
        self.production = self.d['env']
        self.upload = self.d['upload']
        self.english = self.d['lang']
        self.file_name = self.d['file']
        if self.d['database']:
            self.database = Database()
            self.database.connect()
            self.products, self.no_urls = self.database.select_products(self.d['catalog_id'],
                                                                        self.d['product_id'])
            self.database.disconnect()
            self.change_url_list()
        else:
            self.get_lists_from_excel()
        self.xml = CommonXml()
        party.add_properties(self.xml)
        self.total = len(self.products['urls'])

    def parse(self, response):

        for url in self.products['urls']:

            if self.d['lang'] == 'us':
                request = Request(url, callback=self.parse_can, dont_filter=True)
                yield request

            elif self.d['lang'] == 'english':
                c_url = url.replace(self.users['us'], self.users['canada_en'])
                request = Request(c_url, callback=self.parse_can, dont_filter=True)
                request.meta['language'] = "eng"
                yield request

            elif self.d['lang'] == 'french':
                c_url = url.replace(self.users['us'], self.users['canada_fr'])
                request = Request(c_url, callback=self.parse_can, dont_filter=True)
                request.meta['language'] = "fr"
                yield request

    def change_url_list(self):
        for i in range(0, len(self.products['urls'])):
            if not self.production:
                self.products['urls'][i] = self.products['urls'][i].replace('www', 'qa')
            self.products['urls'][i] = self.products['urls'][i].replace('XXXXX', self.users['us'])

    def get_in_stock(self, hxs):
        """Gets in stock information about product."""
        stock = hxs.select('//div[@id="availability_container"]').extract()
        if not stock:
            return ["IN_STOCK"]
        else:
            return ["NOT_IN_STOCK"]

    def get_basic_info(self, hxs):
        """Getting basic info about products (name, shown with)."""
        name = hxs.select('//div[@id="product_name"]/text()').extract()
        if name:
            name = basic.cdata_field(name)
        shown_with = hxs.select('//div[@id="shown_with_container"]').extract()
        if shown_with:
            shown_with = [basic.cdata(shown_with[0])]
        return name, shown_with

    def get_description(self, hxs):
        description = description = hxs.select('//div[@id="item_description"]').extract()
        description = [basic.cdata(basic.remove_tags(description[0]))]
        description = [description[0].replace(u"\u2044", "/")]
        return description

    def get_price(self, hxs):
        """Getting product prices.
        Gets regular and discount price if there is one."""
        price = hxs.select('//span[@id="divUnitPrice"]/text()').extract()
        if not price:
            price = hxs.select('//div[@id="product_price"]/span[1]/text()').extract()
        if not price:
            price = hxs.select('//div[@id="product_price"]/text()').extract()
        discount = hxs.select('//div[@id="product_price"]/span[@class="pc-salePrice"]/text()').extract()
        price = basic.clean_string(price[0])
        price = re.sub(" +", " ", price)
        price = price.replace("Price:", "")
        price = price.replace("Prix:", "")
        price = basic.cdata(price.strip())
        if discount:
            discount = basic.cdata_field(discount)
        return [price], discount

    def get_add_to_cart_id(self, page):
        """Gets add to cart id from the javascript on the page."""
        tmp = basic.get_middle_text(page, "if(isOrderStarted){", "}else")[0]
        tmp = basic.get_middle_text(tmp, "addItemToCart(", ",")
        return tmp

    def create_subproducts(self, page):
        """Gets information about colors from javascript.
        Returns field of dicts with information about colors.
        Those are really color variants for product."""
        try:
            tmp = page.split("var largeImages = new Array();")[1]
        except IndexError:
            print "This product has no images"
        else:
            tmp = tmp.split("colorDropdownArray")[0]
            images = basic.get_middle_text(tmp, "ProductGroupProduct(", ");")
            image_names = self.get_image_names(page)
            color_products = []
            for im in images:
                product = {}
                attributes = im.split("',")
                product['normal_image_url'] = "http://qa.partylite.biz/imaging/resize?fileName=/productcatalog/production"
                product['normal_image_url'] += self.custom_clean_string(attributes[26], True)
                product['description'] = basic.cdata(self.custom_clean_string(attributes[27]))
                product['color_id'] = self.custom_clean_string(attributes[7], True)
                product['swatch_color'] = basic.cdata(self.custom_clean_string(attributes[9]).replace(" ", ""))
                product['name'] = basic.cdata(image_names[product['color_id']])
                product['add_to_cart_id'] = self.custom_clean_string(attributes[0], True).replace(" ", "")
                product['price'] = self.custom_clean_string(attributes[10], True)
                color_products.append(product)
            return color_products
        return []

    def custom_clean_string(self, string, spaces=False):
        """Custom function for cleaning strings.
        Replaces new line, return and tab signs, also replaces multiple spaces with only one."""
        string = string.replace("\r", "")
        string = string.replace("\n", "")
        string = string.replace("\t", "")
        if not spaces:
            string = re.sub(' +', ' ', string)
        else:
            string = re.sub(' ', '', string)
        string = string.replace("'", "")
        return string

    def get_image_names(self, page):
        """Gets color names for color swatches."""
        temp = page.split("new DropDownInfo")
        names = {}
        for i in range(1, len(temp)):
            names[basic.get_middle_text(temp[i], "('", "'")[0]] = basic.get_middle_text(temp[i], "'", "')")[2]
        return  names

    def get_recommended(self, hxs):
        """Gets recommended product information.
        Returns information about recommended products as dict"""
        rec = hxs.select('//div[@id="right_column_container"]/div')
        new = []
        i = 0
        for r in rec:
            d = {}
            #to do: see how to get full href(different accounts)
            if not i:
                d['link'] = r.select('div/a/@href').extract()[0]
                d['image'] = "http://www.partylite.biz/imaging/resize"
                d['image'] += r.select('div/a/img/@src').extract()[0]
                d['name'] = r.select('div/a/text()').extract()[0]
                new.append(basic.cdata(simplejson.dumps(d)))
            i += 1
        return  new

    def get_reviews(self, page):
        """Gets average product rating.
        Returns string like 4.6 of 5 reviews."""
        id = self.get_review_id(page)
        url = "http://partylite.ugc.bazaarvoice.com/8504-en_us/" + id + "/reviews.djs?format=embeddedhtml"
        url = url.replace(" ", "")
        page = urllib2.urlopen(url).read()
        page = basic.get_middle_text(page, '<div class=\\"BVRRRatingNormalImage\\">', '<\/div>')
        if page:
            rating = basic.get_middle_text(page[0], 'alt=\\"', '\\"')[0]
            return [rating]
        else:
            return []

    def get_more_images(self, page):
        """Gets field of images."""
        try:
            script = basic.get_middle_text(page, "var moreImages", "var numberOfImages")[0]
        except IndexError:
            print "This product has no images."
        else:
            r = basic.get_middle_text(script, "moreImages[", "';")
            images = []
            # return cdata here if needed to go with absolute links
            for i in range(0, len(r)):
                if self.production:
                    images.append("http://www.partylite.biz" + r[i].split("= '")[1])
                else:
                    images.append("http://qa.partylite.biz" + r[i].split("= '")[1])
            return images
        return []

    def get_absolute(self, relatives):
        """Creates absolute path for images. [DEPRECATED]
        Please check if there is a need for this function again.
        If needed dimensions of images got from the client server
        can be changed here."""
        new = []
        print relatives
        os._exit(0)
        for i in range(0, len(relatives)):
            #add width, height here for different dimensions
            #don't change the url in here from qa to www it's meant to be qa always
            new.append("http://www.partylite.biz/imaging/resize?fileName=/productcatalog/production" + relatives[i])
        return new

    def get_review_id(self, page):
        """Gets review id that is used in javascript for reviews."""
        return basic.get_middle_text(page, 'productId: "', '"')[0]

    def write_subproducts(self, id, list, xml):
        """Writes child products to xml.
        Receives id, list and xml attributes, id is master product id,
        list is list of child products and xml is Xml instance"""
        for i in range(0, len(list)):
            item = PartyliteItem()
            item['master_product_id'] = id
            item['product_id'] = [id[0] + "_" + str(i)]
            item['in_stock'] = ["IN_STOCK"]
            for k, v in list[i].iteritems():
                item[k] = [v]
            xml.create_xml(item)
        return 1

    def parse_can(self, response):
        """Parse function for scraping canadian sites.
        There is meta information send in request in this function about language."""
        self.counter += 1
        basic.print_status(self.counter, self.total)
        item = PartyliteItem()
        hxs = HtmlXPathSelector(response)
        image_urls = []
        if  'redirect_urls' in response.request.meta:
            item['product_id'] = [self.get_id(response.request.meta['redirect_urls'][0])[0]]
            self.exc.code_handler(102, response.request.meta['redirect_urls'])
            if 'language' in response.request.meta:
                item['product_id'] = [self.get_id(response.request.meta['redirect_urls'][0])[0]
                                      + "_can" + "_" + response.meta['language']]
            try:
                index = self.products['product_ids'].index(self.get_id
                                (response.request.meta['redirect_urls'][0])[0])
                item['name'] = [basic.cdata(item['product_id'][0]
                                + self.products['names'][index])]
                self.products['status'][index] = 'no_avail'
            except KeyError as e:
                print "This %s id is not in list" % (item['product_id'][0])
            item['in_stock'] = ['NOT_AVAILABLE']
            item['product_id'] = self.remove_spaces(item['product_id'])
            self.xml.create_xml(item)
        else:
            index = self.products['product_ids'].index(self.get_id(response.url)[0])
            try:
                item['product_id'] = self.get_id(response.url)
                item['name'], item['shown_with'] = self.get_basic_info(hxs)
                item['description'] = self.get_description(hxs)
                if 'language' in response.meta:
                    item['product_id'] = [item['product_id'][0] + "_can" + "_" + response.meta['language']]
                response.meta['item'] = item
                page = " ".join(hxs.select('//html').extract())
                image_urls = self.get_more_images(page)
                item['normal_image_url'] = self.get_server_path_field(image_urls)
                item['in_stock'] = self.get_in_stock(hxs)
                color_products = self.create_subproducts(page)
                if color_products:
                    self.write_subproducts(item['product_id'], color_products, xml)
                else:
                    item['add_to_cart_id'] = self.get_add_to_cart_id(page)
                    item['custom_price'], item['custom_discount'] = self.get_price(hxs)
                self.products['status'][index] = "ran"
            except StandardError:
                basic.print_error()
                self.products['status'][index] = "error"
                self.exc.code_handler(100, response.url)
            else:
                item['product_id'] = self.remove_spaces(item['product_id'])
                self.xml.create_xml(item)
        if image_urls:
            item['image_urls'] = image_urls
        return item

    def spider_closed(self, spider):
        """Handles spider_closed signal from end of scraping.
        Handles usual end operations for scraper like writing xml, exporting
        to database and sending appropriate mail message."""
        msg = party.get_settings_message(self.d)
        if self.counter < self.total:
            msg += "\nScraper didn't go through all products, please report"
        msg += "\n\nScraped %d product out of %d\n\n" % (self.counter, self.total)
        # filename for writing xml
        if self.d['database']:
            try:
                self.database.connect()
                filename = self.database.get_name(self.d['catalog_id'])
                self.database.update_db(self.products)
                self.database.disconnect()
                msg += "\nRan from interface.\n"
            except:
                msg += "\nUpdating database failed, please report."
        else:
            msg += "\nRan from console.\n"
            filename = self.d['file']
        logname = filename
        filename = "{0}_{1}".format(filename, self.d['lang'])
        self.xml.write_xml(self.name, filename)
        msg += self.exc.create_message(self.counter)
        from modules.mail import Mail
        from modules.export_to_db import CommonExport
        exp = CommonExport()
        if self.upload:
            try:
                if self.d['lang'] == 'us':
                    exp.xml_to_db(self.name, filename, "55892247-1b92-4ff9-a8a3-33cc976f9341")
                else:
                    exp.xml_to_db(self.name, filename, "9cb6c676-c14f-403b-b94f-b981184e1de0")
                msg += "\n\nExport to database successful"
            except StandardError:
                msg += "\n\nExport to database failed"
        else:
            msg += "\n\nUpload to database not selected"
        mail = Mail()
        try:
            mail.send_mail(msg, "Partylite: {0}".format(filename))
            if self.d['email']:
                mail.send_mail(msg, "Partylite: {0}".format(filename), self.d['email'])
        except:
            msg += "\nSending mail failed."
        if self.d['database']:
            path = 'logs/{0}'.format(self.name)
            if not os.path.exists(path):
                os.makedirs(path)
            with open("{0}/{1}".format(path, logname), 'w') as f:
                f.write(msg)

    def get_id(self, url):
        """Gets id from product url."""
        return [url.split("&sku=")[1]]

    def get_server_path(self, url):
        """Gets server path for image url."""
        url = url.split("partylite.biz")[1]
        return self.images_store + "/full/" + hashlib.sha1(url).hexdigest() + ".jpg"

    def get_server_path_field(self, urls):
        """Getting server path for field of image urls."""
        new = []
        for url in urls:
            url = url.split("partylite.biz")[1]
            new.append(self.images_store + "/full/" + hashlib.sha1(url).hexdigest() + ".jpg")
        return new

    def remove_spaces(self, field):
        new = []
        for i in field:
            new.append(i.replace(' ', ''))
        return new

    def get_lists_from_excel(self):
        excel_path = "xls/{0}/{1}.xls".format(self.name, self.d['file'])
        xls = PartyliteExcel(path=excel_path, user=self.users['us'], production=self.production)
        self.products = dict()
        try:
            self.products['urls'] = xls.read_excel_collumn_for_urls(3, 15)
            self.products['product_ids'] = xls.read_excel_collumn_for_ids(1, 15)
            self.products['names'] = xls.read_excel_collumn(2, 15)
        except IOError as e:
            msg = "I/O error {0}: {1}".format(e.errno, e.strerror)
            msg += "\nError occurred for given file: {0}".format(self.d['file'])
            exc.code_handler(103, msg=msg)
        except StandardError:
            msg = "Error reading excel file"
            msg += "\nError occurred for given file: {0}".format(self.d['file'])
            exc.code_handler(103, msg=msg)
        self.products= xls.delete_duplicates_dict(self.products)
        self.products, self.no_urls = xls.separate_no_urls(self.products)
        self.products = xls._add_none_status(self.products)
        self.no_urls = xls._add_none_status(self.no_urls)
コード例 #50
0
def check_alert():
    moneydict, position = Money.search_data()
    all_user = []
    data = Database.find_all(collection="users")
    for user in data:
        all_user.append(user["email"])
    for user in all_user:
        print(user)
        message = []
        user_all_alert = Database.find(collection="all_alert", query={"email": user})
        for user_alert in user_all_alert:
            if user_alert["rate_exchange"] == "cash":
                if moneydict[position[user_alert["currency"]]].cash_in != "-":
                    if float(user_alert["price"][0]) >= float(moneydict[position[user_alert["currency"]]].cash_in):
                        if user_alert["currency"] not in message:
                            message.append(user_alert["currency"])
                        else:
                            pass
                    elif moneydict[position[user_alert["currency"]]].cash_out != "-":
                        if float(user_alert["price"][0]) <= float(moneydict[position[user_alert["currency"]]].cash_out):
                            if user_alert["currency"] not in message:
                                message.append(user_alert["currency"])
                            else:
                                pass
                        else:
                            pass
                    else:
                        pass
                elif moneydict[position[user_alert["currency"]]].cash_out != "-":
                    if float(user_alert["price"][0]) <= float(moneydict[position[user_alert["currency"]]].cash_out):
                        if user_alert["currency"] not in message:
                            message.append(user_alert["currency"])
                        else:
                            pass
                    else:
                        pass
                else:
                    pass

            if user_alert["rate_exchange"] == "sign":
                if moneydict[position[user_alert["currency"]]].sign_in != "-":
                    if float(user_alert["price"][1]) >= float(moneydict[position[user_alert["currency"]]].sign_in):
                        if user_alert["currency"] not in message:
                            message.append(user_alert["currency"])
                        else:
                            pass
                    elif moneydict[position[user_alert["currency"]]].sign_out != "-":
                        if float(user_alert["price"][1]) <= float(moneydict[position[user_alert["currency"]]].sign_out):
                            if user_alert["currency"] not in message:
                                message.append(user_alert["currency"])
                            else:
                                pass
                        else:
                            pass
                    else:
                        pass
                elif moneydict[position[user_alert["currency"]]].sign_out != "-":
                    if float(user_alert["price"][1]) <= float(moneydict[position[user_alert["currency"]]].sign_out):
                        if user_alert["currency"] not in message:
                            message.append(user_alert["currency"])
                        else:
                            pass
                    else:
                        pass
                else:
                    pass
            else:
                pass
        print(user,":",message)
        requests.post(

            "https://api.mailgun.net/v3/sandboxcf0f0204481f4e5db32ca491987d150f.mailgun.org/messages",
            auth=("api", "b7288f39ae1c25d533325c5181e0eada-4a62b8e8-809b3b07"),
            data={"from": "Mailgun Sandbox <*****@*****.**>",
                  "to": user,
                  "subject": "外幣通知",
                  "text": "目前符合調的外幣為:{},請盡快至關網查看!".format(str(message).strip("[]"))})
コード例 #51
0
ファイル: user.py プロジェクト: dar1enyang/Currency-Reminder
 def register_user(name, email, password):
     user_data = Database.find_one(collection="users", query={"email": email})
     if user_data is not None:
         return False
     User(name, email, User.hash_password(password)).save_to_db()
     return True
コード例 #52
0
ファイル: burton_spider.py プロジェクト: marjevtic/testMarko
class BurtonSpider(CrawlSpider):
    name = "burton"
    allowed_domains = ["example.com"]
    start_urls = ["http://www.example.com"]
    counter = 0

    def __init__(self, *a, **kw):
        super(BurtonSpider, self).__init__(*a, **kw)
        dispatcher.connect(self.spider_closed, signals.spider_closed)
        terminal = DatabaseTerminal(sys.argv, self.name)
        self.d = terminal.get_arguments()
        self.xml = CommonXml()
        self.exc = ZmagsException(5, "Burton")
        if self.d['database']:
            self.database = Database()
            self.database.connect()
            self.products, self.no_urls = self.database.select_products(self.d['catalog_id'],
                                                                        self.d['product_id'])
            self.database.disconnect()
        else:
            self.get_lists_from_excel()
        self.handle_not_provided()
        burton.add_properties(self.xml)
        self.start_urls = self.products['urls']
        self.start_urls = ["http://www.dickssportinggoods.com/product/index.jsp?productId=13243074"]
        self.images_store = "/" + settings['IMAGES_STORE']
        self.total = len(self.start_urls)

    def parse(self, response):
        self.counter += 1
        basic.print_status(self.counter, self.total)
        hxs = HtmlXPathSelector(response)
        item = BurtonItem()
        page = hxs.extract()
        if 'redirect_urls' in response.request.meta:
            cur_url = response.request.meta['redirect_urls'][0]
        else:
            cur_url = response.url
        index = self.products['urls'].index(cur_url)
        try:
            if 'redirect_urls' in response.request.meta:
                item['product_id'] = [self.products['product_ids'][index]]
                item['name'] = [self.products['names'][index]]
                item['in_stock'] = ["NOT_AVAILABLE"]
                self.exc.code_handler(102, response.url)
                self.xml.create_xml(item)
                self.products["status"][index] = "no_avail"
            else:
                item['product_id'], item['name'] = self.get_basic_info(hxs)
                item['description'], item['features'] = self.get_description(hxs)
                item['variants'], thumb_urls, color_names = self.get_variants(page)
                item['all_sizes'] = self.get_all_sizes(page)
                item['color_json'], image_urls = self.get_colors(page, color_names)
                item['price'], item['old_price'] = self.get_prices(hxs)
                item['in_stock'] = ['IN_STOCK']
                item['product_link'] = [basic.cdata(response.url)]
                self.xml.create_xml(item)
                item['image_urls'] = image_urls + thumb_urls
                self.products["status"][index] = "ran"
        except:
            self.exc.code_handler(100, response.url)
            self.products["status"][index] = "error"
        else:
            return item

    def handle_not_provided(self):
        item = BurtonItem()
        for n in self.no_urls['product_ids']:
            item['product_id'] = [n]
            index = self.no_urls['product_ids'].index(n)
            item['name'] = [self.no_urls['names'][index]]
            item['in_stock'] = ['NOT_AVAILABLE']
            self.xml.create_xml(item)

    def get_basic_info(self, hxs):
        name = hxs.select('//h1[@class="productHeading"]/text()').extract()
        product_id = hxs.select('//input[@name="productId"]/@value').extract()
        return product_id, name

    def get_server_path(self, url):
        path = self.images_store + "/full/" + hashlib.sha1(url).hexdigest() + ".jpg"
        return path

    def get_prices(self, hxs):
        price = hxs.select('//div[@class="op"]/text()').extract()
        price = [basic.get_price(price[0])]
        old_price = hxs.select('//span[@class="lp"]/text()').extract()
        if old_price:
            old_price = [basic.get_price(old_price[0])]
        return price, old_price

    def get_description(self, hxs):
        description = hxs.select('//div[@id="FieldsetProductInfo"]/text()').extract()[3]
        features = hxs.select('//div[@id="FieldsetProductInfo"]/ul').extract()
        if features:
            features = [features[0][:2000]]
        return [basic.cdata(description)], basic.cdata_field(features)

    def get_variants(self, page):
        """Gets jsons for colors with all available sizes.
        In json are also fetched all information for sizes that are on the site
        """
        script = basic.get_middle_text(page, 'var skuSizeColorObj = new Array();', '</script>')[0]
        sizes = []
        image_urls = []
        color_names = []
        colors = script.split('skuSizeColorObj')
        for c in range(1, len(colors)):
            temp = basic.get_middle_text(colors[c], '= ', ';')
            # delete swatch image as it obviously won't be needed
            t = simplejson.loads(burton.replace_for_json(temp[0]))
            image_urls.append(t['swatchURL'])
            color_names.append(t['ColorDesc'])
            t['swatchURL'] = self.get_server_path(t['swatchURL'])
            sizes.append(basic.cdata(simplejson.dumps(t)))
        return sizes, image_urls, color_names

    def get_all_sizes(self, page):
        script = basic.get_middle_text(page, 'var distsizeobj=new Array();', 'var indexcolor=0;')[0]
        all_sizes = basic.get_middle_text(script, ']="','";')
        return [basic.cdata(simplejson.dumps(all_sizes))]

    def get_colors(self, page, color_names):
        """Gets color information with images from javascript on the page.
        Returns  json with color name and imagself.images_store = "/" + settings['IMAGES_STORE']e url for that color, and
        returnes filed of image urls that can be used for download later"""
        script = basic.get_middle_text(page, 'var imageMap_0 = new Array();', '</script>')[0]
        colors = basic.get_middle_text(script, '] = ', ';')
        image_urls = []
        colors_json = []
        for i in range(0, len(color_names)):
            color = burton.replace_color_json(colors[i])
            color = simplejson.loads(color)
            color['cname'] = color_names[i]
            color.pop('reg')
            image_urls.append(color['enh'])
            color['enh'] = self.get_server_path(color['enh'])
            colors_json.append(basic.cdata(simplejson.dumps(color)))
        return colors_json, image_urls

    def spider_closed(self, spider):
        """Handles spider_closed signal from end of scraping.
        Handles usual end operations for scraper like writing xml, exporting
        to database and sending appropriate mail message."""
        msg = "Ran: {0}".format(datetime.now())
        if self.counter < self.total:
            msg += "\nScraper didn't go through all products, please report"
        msg += "\n\nScraped %d product out of %d\n\n" % (self.counter, self.total)
        # filename for writing xml
        if self.d['database']:
            try:
                self.database.connect()
                filename = self.database.get_name(self.d['catalog_id'])
                self.database.update_db(self.products)
                self.database.disconnect()
                msg += "\nRan from interface.\n"
            except:
                msg += "\nUpdating database failed, please report."
        else:
            msg += "\nRan from console.\n"
            filename = self.d['file']
        self.xml.write_xml(self.name, filename)
        msg += self.exc.create_message(self.counter)
        if self.d['upload']:
            exp = CommonExport()
            try:
                exp.xml_to_db(self.name, filename, "4ea95a81-90fb-49e2-837e-acf5ab58f574")
                msg += "\n\nExport to database successful"
            except StandardError:
                msg += "\n\nExport to database failed"
        else:
            msg += "\n\nUpload to database not selected"
        # part for exporting to database here
        from modules.mail import Mail
        mail = Mail()
        try:
            mail.send_mail(msg, "Burton: {0}".format(filename))
            if self.d['email']:
                mail.send_mail(msg, "Burton: {0}".format(filename), self.d['email'])
        except:
            msg += "\nSending mail failed."
        if self.d['database']:
            path = "logs/{0}".format(self.name)
            if not os.path.exists(path):
                os.makedirs(path)
            with open("{0}/{1}".format(path, filename), 'w') as f:
                f.write(msg)

    def get_lists_from_excel(self):
        xls = DictExcel(basic.get_excel_path(self.name, self.d['file']))
        self.products = dict()
        try:
            self.products["urls"] = xls.read_excel_collumn_for_urls(3, 15)
            self.products["product_ids"] = xls.read_excel_collumn_for_ids(1, 15)
            self.products["names"] = xls.read_excel_collumn(2, 15)
        except IOError as e:
            msg = "I/O error {0}: {1}".format(e.errno, e.strerror)
            msg += "\nError occurred for given file: {0}".format(self.d['file'])
            self.exc.code_handler(103, msg=msg)
        except StandardError:
            msg = "Error reading excel file"
            msg += "\nError occurred for given file: {0}".format(self.d['file'])
            self.exc.code_handler(103, msg=msg)
        self.products= xls.delete_duplicates_dict(self.products)
        self.products, self.no_urls = xls.separate_no_urls(self.products)
        self.products = xls._add_none_status(self.products)
        self.no_urls = xls._add_none_status(self.no_urls)
コード例 #53
0
ファイル: lydias_spider.py プロジェクト: marjevtic/testMarko
class LydiasSpider(CrawlSpider):
    name = "lydias"
    allowed_domains = ["example.com"]
    start_urls = ["http://www.example.com"]
    counter = 0

    def __init__(self, *a, **kw):
        super(LydiasSpider, self).__init__(*a, **kw)
        dispatcher.connect(self.spider_closed, signals.spider_closed)
        terminal = DatabaseTerminal(sys.argv, self.name)
        self.d = terminal.get_arguments()
        self.xml = VariantsXml()
        self.exc = ZmagsException(5)
        if self.d['database']:
            self.database = Database()
            self.database.connect()
            self.products, self.no_urls = self.database.select_products(self.d['catalog_id'],
                                                                        self.d['product_id'])
            self.database.disconnect()
        else:
            self.get_lists_from_excel()
        # fix for bug with links they provide
        self.products['urls'] = basic.cut_string_field(self.products['urls'], "&cat=")
        self.handle_not_provided()
        self.start_urls = self.products['urls']
        self.images_store = "/" + settings['IMAGES_STORE']
        lydias.add_properties(self.xml)
        self.total = len(self.products['urls'])

    def parse(self, response):
        self.counter += 1
        basic.print_status(self.counter, self.total)
        hxs = HtmlXPathSelector(response)
        item = LydiasItem()
        if 'redirect_urls' in response.request.meta:
            cur_url = response.request.meta['redirect_urls'][0]
        else:
            cur_url = response.url
        index = self.products['urls'].index(cur_url)
        id = self.products['product_ids'][index]
        try:
            available = hxs.select('//div[@id="searchfor"]/text()').extract()
            if not available:
                item['product_id'] = [id]
                item['name'], item['price'], item['old_price'], item['description'] = self.get_basic_info(hxs)
                item['rating'], item['custom_rating'] = self.get_rating(hxs)
                chart = self.absolute_path(self.get_size_image(hxs))
                item['sizes_chart_image_url'] = self.get_server_path(chart)
                color_urls, color_names, product_image, color_codes = self.get_image_swatches(hxs)
                color_urls = self.absolute_path(color_urls)
                item['color_image_url'] = self.make_colors_json(color_urls, color_names, color_codes)
                item['in_stock'] = ["IN_STOCK"]
                item['embroidery'] = self.get_embroidery(hxs)
                default_images = self.absolute_path(self.get_extra_images(hxs))
                item['default_image_url'] = self.get_server_path(default_images)
                self.xml.create_xml(item)
                product_image = self.absolute_path(product_image)
                self.create_subproducts(id, color_names, product_image, color_codes, hxs)
                item['image_urls'] = product_image + color_urls + chart + default_images
                self.products['status'][index] = "ran"
            else:
                self.exc.code_handler(102, response.url)
                item['product_id'] = [id]
                item['in_stock'] = ["NOT_AVAILABLE"]
                self.products['status'][index] = "not_avail"
                self.xml.create_xml(item)
        except:
            self.products['status'][index] = "error"
            self.exc.code_handler(100, response.url)
        return item

     # function for checking if product has embroidery or not
    def get_embroidery(self, hxs):
        page = hxs.select('//html').extract()[0]
        if "document.getElementById('logocolor').disabled = true;" in page:
            return ["True"]
        else:
            return ["False"]

    # function for creating json with all information for colors
    def make_colors_json(self, color_urls, color_names, color_codes):
        dict = {}
        jsons = []
        for i in range(0, len(color_urls)):
            dict['color_url'] = self.get_server_path_single(color_urls[i])
            dict['color_name'] = color_names[i]
            dict['color_short'] = color_codes[i]
            json = basic.cdata(simplejson.dumps(dict))
            jsons.append(json)
        return jsons

    # function for getting image server path
    def get_server_path_single(self, url):
#        return url
        return self.images_store + "/full/" + hashlib.sha1(url).hexdigest() + ".jpg"

    # function for getting image path for field of images
    def get_server_path(self, urls):
#        return urls
        new = []
        for url in urls:
            new.append(self.images_store + "/full/" + hashlib.sha1(url).hexdigest() + ".jpg")
        return new

    #function for getting basic information for product
    def get_basic_info(self, hxs):
        name = hxs.select('//div[@id="proddetail"]/h1/text()').extract()
        price = hxs.select('//div[@id="proddetail"]/div[@class="yourprice bigprice"]/text()').extract()
        description = basic.cdata(hxs.select('//div[@id="details"]').extract()[0])
        description = basic.clean_string(description)
        old_price = hxs.select('//span[@class="yourprice_product"]/text()').extract()
        if not price:
            price = hxs.select('//span[@id="PriceDisplay"]/text()').extract()
        if old_price:
            old_price = [re.sub('[^0-9.]', '', old_price[0])]
        price = [re.sub('[^0-9.]', '', price[0])]
        return name, price, old_price, [description]

    # function for getting rating, both number and sentence (e.g. Rating 5 out of 6 votes)
    def get_rating(self, hxs):
        temp = hxs.select('//div[@id="Customerssay"]/p[2]/text()').extract()
        if temp:
            rating = basic.get_middle_text(temp[0].replace(" ", ""), "Rating:", "out")
            return rating, temp
        else:
            return [], temp

    #function for getting reviews, returning rating and field of json reviews
    # or empty fields if there's no reviews
    def get_reviews(self, hxs):
        reviews = hxs.select('//div[@class="prodReview"]')
        if reviews:
            title = reviews[0].select('p[@class="review_title"]/text()').extract()
            text = reviews[0].select('p[@class="review_text"]/text()').extract()
            author = reviews[0].select('p[@class="review_author"]/text()').extract()
            location = reviews[0].select('p[@class="review_location"]/text()').extract()
            jsons = self.make_reviews_json(title, text, author, location)
            return jsons
        else:
            return []

    # function for making json for reviews
    # currently not in use. cause there are no reviews in DPW design
    def make_reviews_json(self, title, text, author, location):
        jsons = []
        print len(title)
        print len(text)
        print len(author)
        print len(location)
        os._exit(0)
        for i in range(0, len(title)):
            json = '{ "title" : " %s ", "text" : "%s", "author" : "%s", "location" :\
                    "%s" }' % (title[i], text[i], author[i], location[i])
            json = basic.cdata(json)
            jsons.append(json)
        return jsons

    #function for getting size chart image
    def get_size_image(self, hxs):
        temp = hxs.select('//div[@class="TabbedPanelsContent cells"]/img/@src').extract()
        return temp

    #function for getting image swatches, returning fields (image_urls, image name, product color image)
    def get_image_swatches(self, hxs):
        colors = hxs.select('//div[@class="lolite"]')
        color_images = []
        color_names = []
        products_image = []
        color_codes = []
        for color in colors:
            color_images.append(color.select('a/img/@src').extract()[0])
            color_names.append(color.select('a/img/@alt').extract()[0])
            #if zoom image needed, this is the place to get it
            products_image.append(color.select('a/@rev').extract()[0])
            color_codes.append(color.select('a/@onclick').extract()[0].split(",")[1].replace("'", ""))
        return color_images, color_names, products_image, color_codes

    #function for getting additional images, returns field of images or empty field if there is no
    def get_extra_images(self, hxs):
        additional_images = hxs.select('//div[@id="AddImg"]/script/text()').extract()
        if additional_images:
            temp = basic.get_middle_text(additional_images[0], '"', '"')
            thumb_images = temp[0].split(",")
            return thumb_images
        else:
            return []

    #function for getting product id from the page
    def get_product_id(self, hxs):
        temp = hxs.select('//div[@id="wrap"]/script/text()').extract()
        id = basic.get_middle_text(temp[0], 'productid","', '"')
        return id[0]

    # function for getting sizes from another url, retunrning field of jsons for sizes
    # one id from the page is 115NB, if needed here to hardcode for testing
    # currently not in use
    def get_sizes(self, id, hxs):
        showmode = hxs.select('//input[@name="showmode"]/@value').extract()[0]
        itemmode = hxs.select('//input[@name="itemmode"]/@value').extract()[0]
        salemode = hxs.select('//input[@name="salemode"]/@value').extract()[0]
        url = "http://www.lydiasuniforms.com/ajaxed/product-showoptions.asp?sku=%s&opt1=AV&opt2=-1&type2=l1type" % (id)
        url += "&type3=&showmode=%s&itemmode=%s&salemode=%s&rnum=429" % (showmode, itemmode, salemode)
        jsons = []
        print "reading page..."
        page = urllib2.urlopen(url).read()
        print "page read"
        page = page.replace("'", "")
        page = page.replace("[", ",")
        page = page.replace(",,", "")
        temp = page.split("]")
        for i in range(0, len(temp) - 2):
            tmp = temp[i].split(",")
            json = '{ "size_short" : " %s ", "size_full" : "%s", "some_number" :\
                    "%s", "some_id" : "%s" }' % (tmp[0], tmp[1], tmp[2], tmp[3])
            json = basic.cdata(json)
            jsons.append(json)
        return jsons

    # function that handles creating subproducts, can be implemented for the usual way product for every combination
    # of size and color if needed
    def create_subproducts(self, id, color_names, product_image, color_codes, hxs):
        item = LydiasItem()
        # if no colors for specific product do this part and call to creating size children with empty string instead
        # of actual color name
        if len(color_names) == 0:
            item['master_product_id'] = [id]
            item['product_id'] = [id + "_" + "0"]
            item['color'] = ["NO_COLOR"]
            item['custom_size'] = self.create_sizes_subproducts(id, id + "_" + "0", "", hxs)
            self.xml.create_xml(item)

        # for handling cases when there are color options for specific product, create child for every color, and call
        # for creating size children for every provided color
        else:
            for i in range(0, len(color_names)):
                print "name :" + color_names[i] + "  code:" + color_codes[i]
                item['master_product_id'] = [id]
                item['product_id'] = [id + "_" + str(i)]
                item['color'] = [color_names[i]]
                item['color_short'] = [color_codes[i]]
                item['normal_image_url'] = self.get_server_path([product_image[i]])
                item['in_stock'] = ["IN_STOCK"]
                item['custom_size'] = self.create_sizes_subproducts(id, id + "_" + str(i), color_codes[i], hxs)
                self.xml.create_xml(item)
                item.clear()
        return 0

    # function for creating child products for sizes
    # little messy with all the commented lines but those lines can be used if needed to go back to old way with
    # child products instead of json
    def create_sizes_subproducts(self, main_id, id, color_code, hxs):
        print color_code
        jsons = []
        # if block for cases when color is provided
        if color_code != "":
            showmode = hxs.select('//input[@name="showmode"]/@value').extract()[0]
            itemmode = hxs.select('//input[@name="itemmode"]/@value').extract()[0]
            salemode = hxs.select('//input[@name="salemode"]/@value').extract()[0]
            url = "http://www.lydiasuniforms.com/ajaxed/product-showoptions.asp?sku=%s&opt1=%s&opt2=-1&type2=l1type&" \
                "type3=&showmode=%s&itemmode=%s&salemode=%s&rnum=193" % (main_id, color_code, showmode, itemmode, salemode)
            page = urllib2.urlopen(url).read()
            page = page.replace("'", "")
            page = page.replace("[", ",")
            page = page.replace(",,", "")
            temp = page.split("]")
            for i in range(0, len(temp) - 2):
                tmp = temp[i].split(",")
                item = {}
#                item['master_product_id'] = [id]
                item['size_short'] = tmp[0]
                item['price_url'] = self.get_size_price(str(main_id), str(color_code), tmp[0])
                item['size'] = tmp[1]
#                item['product_id'] = [id + "_" + str(i)]
#                item['in_stock'] = ["IN_STOCK"]
#                xml.create_xml(item)
                jsons.append(basic.cdata(simplejson.dumps(item)))
            return jsons

        # when the color is not provided different block of code cause it's done differently on the page
        else:
            temp = hxs.select('//div[@class="not_size"]/text()').extract()
            for i in range(0, len(temp)):
                item = {}
#                item['master_product_id'] = [id]
#                item['product_id'] = [id + "_" + str(i)]
                item['size_short'] = temp[i]
                item['price_url'] = self.get_size_price(str(main_id), "", temp[i])
#                item['in_stock'] = ["IN_STOCK"]
#                xml.create_xml(item)
                jsons.append(basic.cdata(simplejson.dumps(item)))
            return jsons

#        return 0

    # function for getting price for combination of every size and color, can return url where the price is, or can
    # parse that url to get that actual price but will drastically increase scraping time
    def get_size_price(self, id, color, size):
        if color != "":
            url = "http://www.lydiasuniforms.com/ajaxed/product-showprice.asp?sku=%s %s %s&qty=1&itemmode=" \
                  "0&showmode=1&rnum=388" % (str(id), str(color), size)
        else:
            url = "http://www.lydiasuniforms.com/ajaxed/product-showprice.asp?sku=%s %s&qty=1&itemmode=" \
                  "0&showmode=1&rnum=259" % (id, size)
        url = url.replace(" ", "%20")
        return url

    # just adding part for getting absolute paths for relative paths from page
    def absolute_path(self, urls):
        new = []
        for i in urls:
            new.append("http://www.lydiasuniforms.com" + i)
        return new

    # function used for gettin embroidery information from clients page, was used only once to get it
    # cause embroidery is the same for all the products
    def get_emb(self, hxs):
        emb = hxs.select('//div[@id="emb"]').extract()
        lettering_colors = hxs.select('//select[@id="threadcolor"]/option/@value').extract()
        urls = []
        d = {}
        colors = []
        for i in range(1, len(lettering_colors)):
            d['type'] = "lettering colors"
            d['name'] = lettering_colors[i]
            url = "http://www.lydiasuniforms.com/images/lydias/threadcolor_"
            url += lettering_colors[i].lower().replace(' ', '_') + ".gif"
            d['url'] = self.get_server_path_single(url)

            urls.append(url)
            colors.append(basic.cdata(simplejson.dumps(d)))
        lettering = hxs.select('//select[@id="lettering"]/option/@value').extract()
        l = {}
        letterings = []
        for i in range(1, len(lettering)):
            l['type'] = "lettering"
            l['name'] = lettering[i]
            url = "http://www.lydiasuniforms.com/images/lydias/lettering_"
            url += lettering[i].lower().replace(' ', '_') + ".gif"
            l['url'] = self.get_server_path_single(url)
            letterings.append(basic.cdata(simplejson.dumps(l)))
            urls.append(url)
        logo = hxs.select('//select[@id="logoname"]/option/@value').extract()
        logos = {}
        log = []
        for i in range(1, len(logo)):
            logos['type'] = "logo"
            logos['name'] = logo[i]
            url = "http://www.lydiasuniforms.com/images/logos/"
            url += logo[i].lower() + ".jpg"
            logos['url'] = self.get_server_path_single(url)
            urls.append(url)
            log.append(basic.cdata(simplejson.dumps(logos)))
        item = LydiasItem()
        item['color'] = colors
        item['lettering'] = letterings
        item['log'] = log
        xml.create_xml(item)
        xml.write_xml("emb")

        return urls
        print  colors, letterings, log
        os._exit(0)

    def handle_not_provided(self):
        item = LydiasItem()
        for n in self.no_urls['product_ids']:
            item['product_id'] = [n]
            index = self.no_urls['product_ids'].index(n)
            item['name'] = [self.no_urls['names'][index]]
            item['in_stock'] = ['NOT_AVAILABLE']
            self.xml.create_xml(item)

    def spider_closed(self, spider):
        """Handles spider_closed signal from end of scraping.
        Handles usual end operations for scraper like writing xml, exporting
        to database and sending appropriate mail message."""
        msg = ""
        if self.counter < self.total:
            msg += "\nScraper didn't go through all products, please report"
        msg += "\n\nScraped %d product out of %d\n\n" % (self.counter, self.total)
        # filename for writing xml
        if self.d['database']:
            try:
                self.database.connect()
                filename = self.database.get_name(self.d['catalog_id'])
                self.database.update_db(self.products)
                self.database.disconnect()
                msg += "\nRan from interface.\n"
            except:
                msg += "\nUpdating database failed, please report."
        else:
            msg += "\nRan from console.\n"
            filename = self.d['file']
        self.xml.write_xml(self.name, filename)
        msg += self.exc.create_message(self.counter)
        #if self.d['upload']:
            #exp = CommonExport()
            #try:
                #exp.xml_to_db(self.name, filename, "4b0d6b52-7b05-4e54-9d87-dfe77ac270c9")
                #msg += "\n\nExport to database successful"
            #except StandardError:
                #msg += "\n\nExport to database failed"
        #else:
            #msg += "\n\nUpload to database not selected"
        ## part for exporting to database here
        from modules.mail import Mail
        mail = Mail()
        try:
            mail.send_mail(msg, "Lydias: {0}".format(filename))
        except:
            msg += "\nSending mail failed."
        if self.d['database']:
            path = "logs/{0}".format(self.name)
            if not os.path.exists(path):
                os.makedirs(path)
            with open("{0}/{1}".format(path, filename), 'w') as f:
                f.write(msg)

    def get_lists_from_excel(self):
        xls = DictExcel(basic.get_excel_path(self.name, self.d['file']))
        self.products = dict()
        try:
            self.products['urls'] = xls.read_excel_collumn_for_urls(3, 15)
            self.products['product_ids'] = xls.read_excel_collumn_for_ids(1, 15)
            self.products['names'] = xls.read_excel_collumn(2, 15)
        except IOError as e:
            msg = "I/O error {0}: {1}".format(e.errno, e.strerror)
            msg += "\nError occurred for given file: {0}".format(self.d['file'])
            self.exc.code_handler(103, msg=msg)
        except StandardError:
            msg = "Error reading excel file"
            msg += "\nError occurred for given file: {0}".format(self.d['file'])
            self.exc.code_handler(103, msg=msg)
        else:
            self.products = xls.delete_duplicates_dict(self.products)
            self.products, self.no_urls = xls.separate_no_urls(self.products)
            self.products = xls._add_none_status(self.products)
            self.no_urls = xls._add_none_status(self.no_urls)
コード例 #54
0
ファイル: chome_spider.py プロジェクト: marjevtic/testMarko
class ChomeSpider(CrawlSpider):
    name = "chome"
    allowed_domains = ["zmags.com"]
    start_urls = ["http://www.zmags.com/"]
    counter = 0

    def __init__(self, *a, **kw):
        super(ChomeSpider, self).__init__(*a, **kw)
        dispatcher.connect(self.spider_closed, signals.spider_closed)
        terminal = DatabaseTerminal(sys.argv, self.name)
        self.d = terminal.get_arguments()
        self.xml = CommonXml()
        self.exc = ZmagsException(5)
        if self.d['database']:
            self.database = Database()
            self.database.connect()
            self.products, self.no_urls = self.database.select_products(self.d['catalog_id'],
                                                                        self.d['product_id'])
            self.database.disconnect()
        else:
            self.get_lists_from_excel()
        self.add_properties(self.xml)
        self.images_store = "/" + settings['IMAGES_STORE']
        self.total = len(self.no_urls['product_ids'])

    def parse(self, response):
        self.counter += 1
        hxs = HtmlXPathSelector(response)
        item = ChomeItem()
        print "IDs in excel feed: {0}".format(self.total)
        item['image_urls'] = self.parse_whole_xml()
        return item

    def parse_whole_xml(self):
        xml_dir = "xml/{0}".format(self.name)
        file_url = "https://svc.celebratinghome.com/ZMags.svc/ProductInfo1"
        downloader = Downloader()
        if self.d['download']:
            downloader.get_file(xml_dir, file_url, "client_feed")
        else:
            if not os.path.exists('xml/{0}/client_feed.xml'.format(self.name)):
                basic.warning("Feed file doesn't exist please de-select no download option")
                os._exit(2)
        self.number = 0
        xml_item = ChomeItem()
        urls_all = []
        for event, elem in iterparse('xml/{0}/client_feed.xml'.format(self.name)):
            if elem.tag == "{http://schemas.microsoft.com/ado/2007/08/dataservices/metadata}properties":
                for r in elem:
                    p = "{http://schemas.microsoft.com/ado/2007/08/dataservices}"
                    if r.tag == p + "Id" and r.text in self.no_urls['product_ids']:
                        index = self.no_urls['product_ids'].index(r.text)
                        self.no_urls['status'][index] = 'ran'
                        self.number += 1
                        urls = []
                        flag = 0
                        for x in elem:
                            if x.tag == p + "Id":
                                xml_item['product_id'] = [x.text]
                            elif x.tag == p + "EngLongDesc" and x.text is not None:
                                xml_item['description_english'] = [self.escape(basic.cdata(x.text))]
                            elif x.tag == p + "RetailPrice":
                                xml_item['custom_price'] = [x.text[:-2]]
                            elif x.tag == p + "SpnLongDesc" and x.text is not None:
                                xml_item['description_spanish'] = [self.escape(basic.cdata(x.text))]
                            elif x.tag == p + "PartNumber":
                                xml_item['add_to_cart_id'] = [x.text]
                            elif x.tag == p + "MaxQty":
                                xml_item['max_qty'] = [x.text]
                            elif x.tag == p + "TimeType":
                                xml_item['time_type'] = [x.text]
                            elif x.tag == p + "SpnName" and x.text is not None:
                                xml_item['name_spanish'] = [x.text]
                            elif x.tag == p + "EngName":
                                xml_item['name_english'] = [x.text]
                            elif x.tag == p + "ImagePath_Large" and x.text is not None:
                                urls.append(self.get_absolute(x.text))
                                xml_item['normal_image_url'] = [self.get_server_path(self.get_absolute(x.text))]
                            elif x.tag == p + "IsActive":
                                if x.text == 0:
                                    xml_item['in_stock'] = ["NOT_IN_STOCK"]
                                else:
                                    xml_item['in_stock'] = ['IN_STOCK']
                            else:
                                for i in range(1, 4):
                                    tag = p + "Alternate%sImagePath_Large" % (str(i))
                                    if x.tag == tag and x.text is not None:
                                        urls.append(self.get_absolute(x.text))
                                        xml_item['normal_image_url'].append(self.get_server_path(self.get_absolute(x.text)))
                                        # change image paths for normal_image_url and return urls
                        self.xml.create_xml(xml_item)
                        urls_all += urls
        for i in range(0, len(self.no_urls['status'])):
            if self.no_urls['status'][i] != 'ran':
                self.no_urls['status'][i] = 'not_found'
        return urls_all

    def get_server_path(self, url):
        path = self.images_store + "/full/" + hashlib.sha1(url).hexdigest() + ".jpg"
        return path

    def get_absolute(self, url):
        return "http://www.celebratinghome.com/" + url

    def escape(self, string):
        temp = HTMLParser.HTMLParser().unescape(string)
        return HTMLParser.HTMLParser().unescape(temp)

    def spider_closed(self, spider):
        """Handles spider_closed signal from end of scraping.
        Handles usual end operations for scraper like writing xml, exporting
        to database and sending appropriate mail message."""
        msg = "Ran: {0}\n".format(datetime.now())
        if self.total - self.number:
            msg += "{0} id(s) from id list weren't found in feed".format(self.total - self.number)
            basic.warning(msg)
        else:
            msg += "All ids found in feed."
            basic.green(msg)
        # filename for writing xml
        if self.d['database']:
            try:
                self.database.connect()
                filename = self.database.get_name(self.d['catalog_id'])
                self.database.update_db(self.no_urls)
                self.database.disconnect()
                msg += "\nRan from interface.\n"
            except:
                msg += "\nUpdating database failed, please report."
        else:
            msg += "\nRan from console.\n"
            filename = self.d['file']
        self.xml.write_xml(self.name, filename)
        msg += self.exc.create_message(self.counter)
        #if self.d['upload']:
            #exp = CommonExport()
            #try:
                #exp.xml_to_db(self.name, self.d['file'], "40b029c9-dff7-4bc1-b8bc-ef062960b24d")
                #msg += "\n\nExport to database successful"
            #except StandardError:
                #msg += "\n\nExport to database failed"
        #else:
            #msg += "\n\nUpload to database not selected"
        from modules.mail import Mail
        mail = Mail()
        try:
            mail.send_mail(msg, "CelebratingHome: {0}".format(filename))
            if self.d['email']:
                mail.send_mail(msg, "CelebratingHome: {0}".format(filename), self.d['email'])
        except:
            msg += "\nSending mail failed."
        if self.d['database']:
            path = "logs/{0}".format(self.name)
            if not os.path.exists(path):
                os.makedirs(path)
            with open("{0}/{1}".format(path, filename), 'w') as f:
                f.write(msg)

    def get_lists_from_excel(self):
        xls = DictExcel(basic.get_excel_path(self.name, self.d['file']))
        self.products = dict()
        try:
            self.products['product_ids'] = xls.read_excel_collumn_for_ids(1, 15)
            self.products['names'] = xls.read_excel_collumn(2, 15)
            self.products['urls'] = xls.read_excel_collumn_for_urls(3, 15)
        except IOError as e:
            msg = "I/O error {0}: {1}".format(e.errno, e.strerror)
            msg += "\nError occurred for given file: {0}".format(self.d['file'])
            self.exc.code_handler(103, msg=msg)
        except StandardError:
            msg = "Error reading excel file"
            msg += "\nError occurred for given file: {0}".format(self.d['file'])
            self.exc.code_handler(103, msg=msg)
        self.products= xls.delete_duplicates_dict(self.products)
        self.products, self.no_urls = xls.separate_no_urls(self.products)
        self.products = xls._add_none_status(self.products)
        self.no_urls = xls._add_none_status(self.no_urls)

    def add_properties(self, xml):
        xml.add_property("description_english", "Description English", "text")
        xml.add_property("description_spanish", "Description Spanish", "text")
        xml.add_property("add_to_cart_id", "Add To Cart ID", "text")
        xml.add_property("max_qty", "Max Quantity", "text")
        xml.add_property("time_type", "Time Type", "text")
        xml.add_property("name_english", "Name English", "text")
        xml.add_property("name_spanish", "Name Spanish", "text")
        xml.add_property("in_stock", "In Stock", "text")
        xml.add_property("custom_price", "Custom Price", "text")
コード例 #55
0
from modules.database import Database
from modules.user import User

Database.initialize()
User.update_user_email("*****@*****.**", "*****@*****.**")
コード例 #56
0
ファイル: kenneth_spider.py プロジェクト: marjevtic/testMarko
class KennethSpider(CrawlSpider):
    name = "kenneth"
    allowed_domains = ["example.com"]
    start_urls = ["http://www.example.com"]
    counter = 0

    def __init__(self, *a, **kw):
        super(KennethSpider, self).__init__(*a, **kw)
        dispatcher.connect(self.spider_closed, signals.spider_closed)
        terminal = DatabaseTerminal(sys.argv, self.name)
        self.images_store = "/" + settings['IMAGES_STORE'] + "/"
        self.d = terminal.get_arguments()
        self.xml = VariantsXml()
        self.exc = ZmagsException(5)
        print self.d
        if self.d['database']:
            self.database = Database()
            self.database.connect()
            self.products, self.no_urls = self.database.select_products(self.d['catalog_id'],
                                                                        self.d['product_id'])
            self.database.disconnect()
        else:
            self.get_lists_from_excel()
        self.add_properties(self.xml)
        self.no_url_products(self.no_urls)
        self.start_urls = self.products['urls'] 
        self.total = len(self.start_urls)

    def parse(self, response):
        self.counter += 1
        basic.print_status(self.counter, self.total)
        hxs = HtmlXPathSelector(response)
        item = KennethItem()
        #main try for script, run general except if error happens in code (send
        # url on mail where it happened)
        try:
            cur_url = response.url
                # search for noResultContent div on the page, if it exists keep
                # track, that product doesn't exist on
                # their page, otherwise continue scraping page
            available = hxs.select('//div[@id="noResultsContent"]').extract()

            if not available:
                index = self.products['urls'].index(cur_url)
                cur_id = self.get_product_id(cur_url)
                id = self.products['product_ids'][index]
                page = hxs.select('//div[@id="mainContent"]').extract()
                page = " ".join(page)
                item['name'], item['description'] = self.get_basic_info(hxs)
                price, new_p, old_p = self.get_prices(hxs)
                if new_p:
                    item['new_price'] = new_p
                    item['old_price'] = old_p
                else:
                    item['price'] = price
                desc = basic.clean_string(item['description'][0])
                item['description'] = [desc]
                urls = self.get_color_image(hxs)
                new = self.get_image_server_path(urls, id)
                item['color_image_urls'] = new
                self.export(item['color_image_urls'], [id], "swatchImage")
                jsons, images = self.we_also_recommend(cur_id, id)
                item['product_page'] = [cur_url]
                item['product_id'] = [id]
                item['add_to_cart_id'] = [cur_id]
                item['recommended_product'] = jsons
                item['in_stock'] = ["IN_STOCK"]
                self.products['status'][index] = "ran"
                images_or_404 = self.get_colors(hxs, page, id)
                if images_or_404 == 404:
                    item['in_stock'] = ["NOT_AVAILABLE"]
                self.xml.create_xml(item)
                item['image_urls'] = []
                if images_or_404 != 404:
                    item['image_urls'] += images_or_404
                item['image_urls'] += urls
                item['image_urls'] += images
                #self.export(item['image_urls'])
                #item['image_urls'] = [] #uncomment for donwloading images 

            else:
                # part for handling products that are not available
                cur_id = self.get_product_id(cur_url)
                cur_url = "http://www.kennethcole.com/product/index.jsp?"
                cur_url += "productId=" + str(cur_id)
                index = self.products['urls'].index(cur_url)
                self.products['status'][index] = "no_avail"
                item['product_id'] = [self.products['product_ids'][index]]
                if self.products['product_ids'][index]:
                    item['name'] = [self.products['names'][index]]
                else:
                    item['name'] = ["not available"]
                item['in_stock'] = ["NOT_AVAILABLE"]
                self.xml.create_xml(item)
                self.exc.code_handler(102, cur_url)
        except:
            # part for catching errors and keeping track of numbers of
            # it and urls where it happened
            print "Error occured scraping this product"
            index = self.products['urls'].index(cur_url)
            self.products['status'][index] = "error"
            self.exc.code_handler(100, cur_url)
        return item

    def no_url_products(self, no_url):
        item = KennethItem()
        for n in no_url['product_ids']:
            item['product_id'] = [n]
            index = no_url['product_ids'].index(n)
            item['name'] = [no_url['names'][index]]
            item['in_stock'] = ['NOT_AVAILABLE']
            self.xml.create_xml(item)

    #function for getting basic product info from the page
    def get_basic_info(self, hxs):
        name = hxs.select('//div[@id="productInfoTop"]/h1/text()').extract()
        description = basic.cdata(hxs.select('//div[@id="productDescription"]').extract()[0])
        return name, [description]

    # function for getting prices from the page, nly one or new and old one if
    # that's the case
    def get_prices(self, hxs):
        price = hxs.select('//div[@id="productInfoTop"]/h2/text()').extract()[0]
        new_p = hxs.select('//h2[@class="sale-now"]/text()').extract()
        old_p = hxs.select('//span[@class="productGrey"]/text()').extract()
        price = re.sub('[^0-9.,]', '', price)
        return [price], new_p, old_p

    def get_color_image(self, hxs):
        return hxs.select('//div[@id="productInfoR2W"]/img/@src').extract()

    # function for gettng colors from javascript on the page, and writing them
    # in xml, from here is called function
    # for creating further sizes subproducts
    def get_colors(self, hxs, page, main_id):
        item = KennethItem()
        try:
            tmp = page.split('displays[0]')[1]
        except IndexError:
            print "This product is not available"
            return 404
        script = tmp.split('</script>')[0]
        displays = script.split("};")
        global counter
        ids = []
        images = []
        color_ids = []
        sizes_script = self.get_sizes_part_page(page)
        color_internal_code = {}

        for x in range(0, len(displays) - 1):
            id = basic.get_middle_text(displays[x], 'colorId: "', '"')
            ids.append(id[0])
            reg = displays[x].count("Reg")
            images_in = []
            for i in range(1, reg + 1):
                image = basic.get_middle_text(displays[x], "vw" + str(i) + 'Reg: "', '"')
                if len(image) == 0:
                    image = basic.get_middle_text(displays[x], "vw" + str(i) + 'Reg:"', '"')
                if (len(image) > 0):
                    if (image[0] != "null"):
                        images_in.append(image[0])

            if not images_in:
                images_in = hxs.select('//input[@name="productImage"]/@value').extract()
            color_ids.append(str(main_id) + "_" + str(x))
            item['product_id'] = [str(main_id) + "_" + str(x)]
            item['color_option_id'] = id
            item['master_product_id'] = [main_id]
            item['normal_image_url'] = self.get_image_server_path(images_in, main_id)
            item['thumb_image_url'] = self.get_image_server_path_thumb(images_in, main_id)
            item['in_stock'] = ["NOT_IN_STOCK"]
            item['color'] = self.get_color_name(sizes_script, id[0])
            color_internal_code[id[0]] = str(x)
            self.xml.create_xml(item)
            images += images_in
            self.export(item['normal_image_url'], item['product_id'], "productImage")
        self.get_sizes(sizes_script, ids, main_id, color_internal_code)
        return images

    # function for getting sizes for products from javascript, and storing 
    # information in dicts of format {id : information}
    def get_sizes(self, page, ids, main_id, color_internal_code):
        options = page.split("};")
        skus = {}
        colors_name = {}
        inStocks = {}
        sizes = {}
        prices = {}
        for x in range(0, len(options) - 1):
            id = basic.get_middle_text(options[x], 'cId: "', '"')
            for i in range(0, len(ids)):
                if (id[0] == ids[i]):
                    sku = basic.get_middle_text(options[x], 'sku: ', ',s')
                    sku = re.sub("[^0-9]", "", sku[0])
                    skus = self.add_to_dict(skus, ids[i], sku)
                    size = basic.get_middle_text(options[x], 'sDesc: "', '"')
                    sizes = self.add_to_dict(sizes, ids[i], size[0])
                    price = basic.get_middle_text(options[x], 'price: "', '"')
                    price = self.clean_price(price[0])
                    prices = self.add_to_dict(prices, ids[i], price[0])
                    available = basic.get_middle_text(options[x], 'avail: "', '"')
                    inStocks = self.add_to_dict(inStocks, ids[i], available[0])
        self.create_subproducts_xml(main_id, color_internal_code, colors_name, sizes, skus, inStocks, prices)
        return main_id, colors_name, sizes, skus, inStocks, prices

    # function for creating subproducts for every size
    def create_subproducts_xml(self, main_id, color_internal_code, colors_name, sizes, skus, inStocks, prices):
        number = 0
        global counter
        for k, v in sizes.iteritems():
            item = KennethItem()
            for i in range(0, len(v)):
                item['size'] = [v[i]]
                item['size_option_id'] = [skus[k][i]]
                m_id = main_id + "_" + color_internal_code[k]
                item['master_product_id'] = [m_id]
                id = m_id + "_" + str(i)
                item['product_id'] = [id]
                if inStocks[k][i] == "NOT_AVAILABLE":
                    item['in_stock'] = ["NOT_IN_STOCK"]
                elif inStocks[k][i] == "ADVANCED_SALE_LIMITED":
                    item['in_stock'] = ["IN_STOCK"]
                else:
                    item['in_stock'] = [inStocks[k][i]]
                item['price'] = [prices[k][i]]
                #item['color'] = colors_name[k]
                self.xml.create_xml(item)
            number += 1

    def add_to_dict(self, dict, index, value):
        try:
            dict[index].append(value)
        except:
            dict[index] = [value]
        return dict

    # function for getting we also recommend information about products from
    # their page, returns json list with information and images
    # list with images urls
    def we_also_recommend(self, id, main_id):
        url = "http://www.res-x.com/ws/r2/Resonance.aspx?appid=kennethcole01&t"
        url += "k=154212870918247&ss=525178103419747&sg=1&pg=897706724574618&b"
        url += "x=true&vr=2.67&sc=product_rr&ev=product&ei=" + id + "&cu=&ct=k"
        url += "ennethcolec01&no=3&cb=r1eh&clk=&cv1=" + id + "&cv23=63&ur=http%"
        url += "3A//www.kennethcole.com/product/index.jsp%3FproductId%3D3" + id
        url += "&plk=&rf="
        import urllib2
        page = urllib2.urlopen(url).read()
        temp = page.split("certonaRecBoxes")
        images = []
        ids = []
        names = []
        prices = []
        urls = []
        # parsing data got from the upper url about we also recommend products
        for i in range(1, len(temp)):
            id = [basic.get_middle_text(temp[i], "d=", '\\"')[0]]
            image = basic.get_middle_text(temp[i], 'src=\\"', '\\"')[0]
            name = basic.get_middle_text(temp[i], 'alt=\\"', '\\"')
            price = basic.get_middle_text(temp[i], '<br>', '</a>')
            url = "http://www.kennethcole.com/product/index.jsp?productId="
            url += id[0]
            urls.append(url)
            ids.append(id)
            names.append(name)
            prices.append(price)
            images.append(image)
        jsons = self.make_json(ids, names, prices, self.get_image_server_path(images, main_id), urls)
        return jsons, images

    # function for getting product id from the url
    def get_product_id(self, url):
        return url.split("=")[1]

    #function for making json
    def make_json(self, ids, names, prices, images, urls):
        jsons = []
        for i in range(0, len(ids)):
            json = "{" + ' "id" : "' + str(ids[i][0]) + '", '
            json += '"name" : "' + str(names[i][0]) + '", '
            # insert function for storing the right image path
            json += '"image_url" : "' + str(images[i]) + '", '
            json += '"product_url" : "' + urls[i] + '", '
            json += '"price" : "' + str(prices[i][0]) + '" } '
            json = basic.cdata(json)
            jsons.append(json)
        return jsons

    #function for getting javascript where sizes are handled
    def get_sizes_part_page(self, page):
        tmp = page.split("availDates = new Array();")[1]
        script = tmp.split("</script>")[0]
        return script

    # function for getting name of the color by id
    def get_color_name(self, script, id):
        temp = script.split(id)
        temp = temp[0].split('cDesc: "')
        temp = temp[len(temp) - 1]
        name = temp.split('"')[0]
        return [name]
        return {id: name}

    #function for exporting images to database via rest
    def export(self, images, id, tags):
        #set override to 0 for uploading images or else to skip uploading
        override = 1
        if override == 0:
            import MultipartPostHandler
            import urllib2
            import os
            url = 'http://api.admin.zmags.com/productImage/import?key=5ef90922-283b-4412-a1c8-3e70bc28b9d3'

            for i in range(0, len(images)):
                image_name = self.get_image_name(images[i])
                path = "images/kenneth_images/small/" + str(image_name)
                params = {'file': file(path, 'rb'), 'product_id': id[0],
                          'index': str(i + 1), 'tags': tags}
                          #token not working
                opener = urllib2.build_opener(MultipartPostHandler.MultipartPostHandler)
                code = opener.open(url, params).getcode()

                if (code != 202):
                    print ("Achtung")
                global images_number
                images_number += 1
                print images_number

                print "Image uploaded to product " + id[0]
        else:
            #print "Image upload overriden.."
            pass

    #function for getting image name from url
    def get_image_server_path(self, urls, id):
#        print urls
        new = []
        for url in urls:
            temp = url.split("/")
            new.append(self.images_store + id + "/full/" + temp[len(temp) - 1])
        return new

    # function for getting image paths on our server
    def get_image_server_path_thumb(self, urls, id):
        new = []
        for url in urls:
            temp = url.split("/")
            new.append(self.images_store + id + "/small/" + temp[len(temp) - 1])
        return new

    def clean_price(self, price):
        return [re.sub('[^0-9.,]', '', price)]

    def spider_closed(self, spider):
        """Handles spider_closed signal from end of scraping.
        Handles usual end operations for scraper like writing xml, exporting
        to database and sending appropriate mail message."""
        msg = ""
        if self.counter < self.total:
            msg += "\nScraper didn't go through all products, please report"
        msg += "\n\nScraped {0} product out of {1}\n\n".format(self.counter, self.total)
        # filename for writing xml
        if self.d['database']:
            try:
                self.database.connect()
                filename = self.database.get_name(self.d['catalog_id'])
                self.database.update_db(self.products)
                self.database.disconnect()
                msg += "\nRan from interface.\n"
            except:
                msg += "\nUpdating database failed, please report."
        else:
            msg += "\nRan from console.\n"
            filename = self.d['file']
        self.xml.write_xml(self.name, filename)
        msg += self.exc.create_message(self.counter)
        if self.d['upload']:
            exp = CommonExport()
            #try:
            exp.xml_to_db(self.name, filename, "29eac9ea-8c57-4d22-baf4-3f1471dc3ab6")
            msg += "\n\nExport to database successful"
            #except StandardError:
                #msg += "\n\nExport to database failed"
        else:
            msg += "\n\nUpload to database not selected"
        from modules.mail import Mail
        mail = Mail()
        try:
            mail.send_mail(msg, "KennethCole: {0}".format(filename))
            if self.d['email']:
                mail.send_mail(msg, "KennethCole: {0}".format(filename), self.d['email'])
        except:
            msg += "\nSending mail failed."
        if self.d['database']:
            path = 'logs/{0}'.format(self.name)
            if not os.path.exists(path):
                os.makedirs(path)
            with open("{0}/{1}".format(path, filename), 'w') as f:
                f.write(msg)

    def get_lists_from_excel(self):
        xls = DictExcel(basic.get_excel_path(self.name, self.d['file']))
        self.products = dict()
        try:
            self.products['urls'] = xls.read_excel_collumn_for_urls(2, 2)
            self.products['product_ids'] = xls.read_excel_collumn_for_ids(0, 2)
            self.products['names'] = xls.read_excel_collumn(1, 2)
        except IOError as e:
            msg = "I/O error {0}: {1}".format(e.errno, e.strerror)
            msg += "\nError occurred for given file: {0}".format(self.d['file'])
            self.exc.code_handler(103, msg=msg)
        except StandardError:
            msg = "Error reading excel file"
            msg += "\nError occurred for given file: {0}".format(self.d['file'])
            self.exc.code_handler(103, msg=msg)
        self.products = xls.delete_duplicates_dict(self.products)
        self.products, self.no_urls = xls.separate_no_urls(self.products)
        self.products = xls._add_none_status(self.products)
        self.no_urls = xls._add_none_status(self.no_urls)

    def add_properties(self, xml):
        xml.add_property("add_to_cart_id", "Add To Cart Id", "text")
        xml.add_property("product_page", "Product page", "text")
        xml.add_property("color_image_urls", "Color Image URLs", "text_list")
        xml.add_property("color_option_id", "Color Option ID", "text")
        xml.add_property("recommended_product", "Recommended Product", "text_list")
        xml.add_property("size_option_id", "Size Option ID", "text")
        xml.add_property("in_stock", "In Stock", "text")
        xml.add_property("old_price", "Old Price", "text")
        xml.add_property("new_price", "New Price", "text")