コード例 #1
0
ファイル: fmtcload.py プロジェクト: beforebeta/pennywyse
def refresh_merchants():
    section("Loading Merchants")
    fh = _download_content("http://services.formetocoupon.com/getMerchants?key=%s" % settings.FMTC_ACCESS_KEY,
                               "Merchants_Content_%s" % datetime.datetime.now().strftime(DATETIME_FORMAT))
    data = etree.iterparse(fh, tag='merchant')
    for event, merchant in data:
        try:
            name = unescape_html(merchant.find("name").text)
            id = merchant.find("id").text
            print "\t%s,%s" % (id,name)
            print '=' * 40
            link = merchant.find('link').text
            skimlinks = merchant.find('skimlinks').text
            homepageurl = merchant.find('homepageurl').text
            model, created = Merchant.objects.get_or_create(name=name)
            model.name = name.strip()
            model.directlink = homepageurl
            model.skimlinks = skimlinks
            model.link = homepageurl
            model.save()
            affiliate_data, created = MerchantAffiliateData.objects.get_or_create(ref_id=id, merchant=model)
            affiliate_data.network = merchant.find('network').text
            affiliate_data.networkid = merchant.find('networkid').text
            affiliate_data.networknote = merchant.find('networknote').text
            affiliate_data.link = link
            if merchant.find('network').text == 'CJ':
                affiliate_data.primary = True
            affiliate_data.save()
        except:
            print_stack_trace()
コード例 #2
0
ファイル: fmtcload.py プロジェクト: beforebeta/pennywyse
def refresh_merchants():
    section("Loading Merchants")
    fh = _download_content(
        "http://services.formetocoupon.com/getMerchants?key=%s" %
        settings.FMTC_ACCESS_KEY, "Merchants_Content_%s" %
        datetime.datetime.now().strftime(DATETIME_FORMAT))
    data = etree.iterparse(fh, tag='merchant')
    for event, merchant in data:
        try:
            name = unescape_html(merchant.find("name").text)
            id = merchant.find("id").text
            print "\t%s,%s" % (id, name)
            print '=' * 40
            link = merchant.find('link').text
            skimlinks = merchant.find('skimlinks').text
            homepageurl = merchant.find('homepageurl').text
            model, created = Merchant.objects.get_or_create(name=name)
            model.name = name.strip()
            model.directlink = homepageurl
            model.skimlinks = skimlinks
            model.link = homepageurl
            model.save()
            affiliate_data, created = MerchantAffiliateData.objects.get_or_create(
                ref_id=id, merchant=model)
            affiliate_data.network = merchant.find('network').text
            affiliate_data.networkid = merchant.find('networkid').text
            affiliate_data.networknote = merchant.find('networknote').text
            affiliate_data.link = link
            if merchant.find('network').text == 'CJ':
                affiliate_data.primary = True
            affiliate_data.save()
        except:
            print_stack_trace()
コード例 #3
0
ファイル: sqootload.py プロジェクト: beforebeta/pennywyse
def go_validate((coupon_model, last_validate_end_time, firsttime, pulseonly)):
    from core.signals import update_object
    try:
        print show_time(), coupon_model.directlink

        sqoot_url = coupon_model.directlink
        is_bad_link, response = fetch_page(sqoot_url)
        if is_bad_link:
            coupon_model.status='confirmed-inactive'
            coupon_model.save()
            handle_exceptions(update_object.send(sender=Coupon, instance=coupon_model))
            return

        is_deal_dead = check_if_deal_dead(coupon_model, response, sqoot_url)
        if is_deal_dead:
            coupon_model.status='confirmed-inactive'
        else:
            coupon_model.status='considered-active'

        coupon_model.save()
        handle_exceptions(update_object.send(sender=Coupon, instance=coupon_model))
        reset_db_queries()

        # Note: Commenting out address/category correction logic (not implemented yet)
        # if firsttime:
        #     confirm_or_correct_deal_data(coupon_model, response)
        # else:
        #     if pulseonly:
        #         return
        #     if last_validate_end_time and (last_validate_end_time > coupon_model.date_added):
        #         return # Data check only the newly added deals.
        #     confirm_or_correct_deal_data(coupon_model, response)
    except:
        print_stack_trace()
コード例 #4
0
 def create_localinfo_index_if_doesnt_exist(self):
     if not self.es.indices.exists(index='localinfo'):
         try:
             settings_and_mappings = {
                 "mappings": {
                     "populars": {
                         "properties": {
                             "user_uuid": {
                                 "type": "string"
                             },
                             "location": {
                                 "type": "geo_point"
                             },
                             "search_keyword": {
                                 "type": "string"
                             },
                             "search_category": {
                                 "type": "string"
                             }
                         }
                     }
                 }
             }
             self.es.indices.create(index='localinfo',
                                    body=settings_and_mappings)
         except:
             print_stack_trace()
コード例 #5
0
def read_sqoot_log(current_stage):
    row_to_lookup, column_to_lookup = LOOKUP_PER_STAGE[current_stage]

    try:
        f = open(SQOOT_LOG_PATH, 'r')
    except IOError:
        print_stack_trace()

    all_rows = f.readlines()
    if len(all_rows) == 1:
        f.close()
        return None
    else:
        last_ten_rows = all_rows[-10:]
        latest_runs_of_this_step = [
            r for r in last_ten_rows
            if r.replace('\r\n', '').split(',')[0] == row_to_lookup
        ]
        if len(latest_runs_of_this_step) == 0:
            f.close()
            return None
        very_last_run = latest_runs_of_this_step[-1]
        timestamp_string = very_last_run.replace(
            '\r\n', '').split(',')[column_to_lookup]
        timestamp_wanted = parse(timestamp_string)
        f.close()
        return timestamp_wanted
コード例 #6
0
def email_subscribe(request):
    try:
        e = EmailSubscription()
        e.app = request.POST["app"]
        e.email = request.POST["email"]
        e.session_key = request.POST["session_key"]
        try:
            e.first_name = request.POST["first_name"]
        except:
            pass
        try:
            e.last_name = request.POST["last_name"]
        except:
            pass
        try:
            e.full_name = request.POST["full_name"]
        except:
            pass
        try:
            e.context = request.POST["context"]
        except:
            pass
        e.save()
        return HttpResponse('1')
    except:
        print_stack_trace()
コード例 #7
0
ファイル: models.py プロジェクト: beforebeta/pennywyse
 def create_from_skimlinks_commissions(self, commissions):
     default_to_empty_string = lambda x: "" if x == None else x
     for c in commissions["skimlinksAccount"]["commissions"].keys():
         try:
             commission = commissions["skimlinksAccount"]["commissions"][c]
             if self.filter(commissionID = commission["commissionID"]).count() > 0:
                 continue #commission already recorded
             comm = Commission(
                 commissionID        = commission["commissionID"],
                 commissionType      = "skimlinks",
                 commissionValue     = float(commission["commissionValue"])/100, #values comes in cents - we convert to dollars)
                 orderValue          = float(commission["orderValue"])/100, #values comes in cents - we convert to dollars)
                 currency            = default_to_empty_string(commission["currency"]),
                 customID            = default_to_empty_string(commission["customID"]),
                 date                = datetime.strptime(commission["date"],"%Y-%m-%d").date(),
                 domainID            = default_to_empty_string(commission["domainID"]),
                 merchantID          = default_to_empty_string(commission["merchantID"]),
                 publisherID         = default_to_empty_string(commission["publisherID"]),
                 items               = int(commission["items"]) if commission["items"] is not None else 0,
                 sales               = int(commission["sales"]) if commission["sales"] is not None else 0,
                 remoteReferer       = default_to_empty_string(commission["remoteReferer"]),
                 remoteUserAgent     = default_to_empty_string(commission["remoteUserAgent"]),
                 url                 = default_to_empty_string(commission["url"]),
                 domain              = default_to_empty_string(shorten_to_domain(commission["url"]) if commission["url"] else ""),
                 status              = default_to_empty_string(commission["status"])
             )
             comm.save()
         except:
             print json.dumps(commissions["skimlinksAccount"]["commissions"][c], indent=4)
             print_stack_trace()
コード例 #8
0
ファイル: models.py プロジェクト: beforebeta/pennywyse
 def save(self, *args, **kwargs):
     super(Visitor, self).save(*args, **kwargs)
     try:
         if self.rev_visitor.all().exists():
             for rv in self.rev_visitor.all():
                 rv.save()
     except:
         print_stack_trace()
コード例 #9
0
ファイル: models.py プロジェクト: beforebeta/pennywyse
 def save(self, *args, **kwargs):
     super(Visitor, self).save(*args, **kwargs)
     try:
         if self.rev_visitor.all().exists():
             for rv in self.rev_visitor.all():
                 rv.save()
     except:
         print_stack_trace()
コード例 #10
0
ファイル: views.py プロジェクト: beforebeta/pennywyse
def _remove_skimlinks(skimlinked_url):
    try:
        parsed = urlparse.urlparse(skimlinked_url)
        query = parsed.query.replace('&','&')
        return urlparse.parse_qs(query)["url"][0]
    except:
        print_stack_trace()
        return skimlinked_url
コード例 #11
0
def _remove_skimlinks(skimlinked_url):
    try:
        parsed = urlparse.urlparse(skimlinked_url)
        query = parsed.query.replace('&', '&')
        return urlparse.parse_qs(query)["url"][0]
    except:
        print_stack_trace()
        return skimlinked_url
コード例 #12
0
ファイル: fmtcload.py プロジェクト: beforebeta/pennywyse
def refresh_deals():
    section("Loading Deals/Coupons")
    fh = _download_content("http://services.formetocoupon.com/getDeals?key=%s" % settings.FMTC_ACCESS_KEY,
                           "Deals_Content_%s" % datetime.datetime.now().strftime(DATETIME_FORMAT))
    data = etree.iterparse(fh, tag='item')
    for event, deal in data:
        try:
            id = deal.find('couponid').text
            coupon, created = Coupon.active_objects.get_or_create(ref_id=id)
            if not created: continue
            merchant_name = deal.find('merchantname').text
            merchantid = deal.find('merchantid').text
            merchant, created = Merchant.objects.get_or_create(name=merchant_name)
            coupon.merchant=merchant

            coupon.categories.clear()
            for category in deal.find("categories"):
                coupon.categories.add(Category.objects.get(code=category.text, ref_id_source__isnull=True))
            coupon.dealtypes.clear()
            dealtypes = deal.find('dealtypes')
            for dealtype in dealtypes.findall("type"):
                coupon.dealtypes.add(DealType.objects.get(code=dealtype.text))

            coupon.description = unescape_html(deal.find('label').text)
            restrictions = deal.find('restrictions').text or ''
            coupon.restrictions = unescape_html(restrictions)
            coupon_code = deal.find('couponcode').text or ''
            coupon.code = unescape_html(coupon_code)

            coupon.start = get_dt(deal.find('startdate').text)
            coupon.end = get_dt(deal.find('enddate').text)
            coupon.lastupdated = get_dt(deal.find('lastupdated').text)
            coupon.created = get_dt(deal.find('created').text)
            coupon.link = deal.find('link').text

            # removing skimlinks prefix from coupon link
            coupon.link = extract_url_from_skimlinks(deal.find('link').text)
            
            coupon.directlink = deal.find('directlink').text
            coupon.skimlinks = deal.find('skimlinks').text
            coupon.status = deal.find('status').text

            coupon.countries.clear()

            for country in deal.findall("country"):
                c, created = Country.objects.get_or_create(code=country.text)
                c.name = country.text
                c.save()
                coupon.countries.add(c)

            coupon.price = deal.find('price').text
            coupon.discount = deal.find('discount').text
            coupon.listprice = deal.find('listprice').text
            coupon.percent = deal.find('percent').text
            coupon.image = deal.find('image').text
            coupon.save()
        except:
            print_stack_trace()
コード例 #13
0
 def get_retailer_link(self):
     """retrieves the direct link to the page"""
     try:
         parsed = urlparse.urlparse(self.skimlinks)
         query = parsed.query.replace('&', '&')
         return urlparse.parse_qs(query)["url"][0]
     except:
         print_stack_trace()
         return self.skimlinks
コード例 #14
0
ファイル: models.py プロジェクト: beforebeta/pennywyse
 def get_retailer_link(self):
     """retrieves the direct link to the page"""
     try:
         parsed = urlparse.urlparse(self.skimlinks)
         query = parsed.query.replace('&','&')
         return urlparse.parse_qs(query)["url"][0]
     except:
         print_stack_trace()
         return self.skimlinks
コード例 #15
0
    def create_short_desc(self):
        try:
            short = self.description.lower()
            if not short:
                return "coupon"
            arr = short.split(" ")

            try:
                if "% off" in short:
                    for i in range(len(arr)):
                        if arr[i].startswith("off"):
                            break
                    return " ".join([arr[i - 1], "off"])
            except:
                pass

            try:
                if self.has_deal_type("percent"):
                    for i in range(len(arr)):
                        if arr[i].endswith("%"):
                            return "%s off" % arr[i]
            except:
                pass

            try:
                if self.has_deal_type("dollar"):
                    for i in range(len(arr)):
                        if arr[i].startswith("$"):
                            return "%s off" % arr[i]
            except:
                pass

            try:
                if self.discount and self.discount > 0:
                    return "$%s off" % int(self.discount)
            except:
                pass

            if self.has_deal_type("gift"):
                return "gift"

            if self.has_deal_type("sale"):
                return "sale"

            if self.has_deal_type("offer"):
                return "offer"

            if self.has_deal_type("freeshipping") or self.has_deal_type(
                    "totallyfreeshipping"):
                return "free ship"
        except:
            print self.ref_id, "Description is", self.description
            print_stack_trace()
        return "coupon"
コード例 #16
0
ファイル: sqootutils.py プロジェクト: beforebeta/pennywyse
def write_sqoot_log(finished_stage, start_time, end_time):
    time_took = end_time - start_time

    try:
        with open(SQOOT_LOG_PATH, 'a') as csvfile:
            log_writer = csv.writer(csvfile)
            log_writer.writerow([finished_stage, start_time, end_time, time_took.seconds/60,])
        csvfile.close()
    except:
        print_stack_trace()
        print "^-- WARNING: Problem logging it: {}{}{}{}{}{}{}"\
                .format(finished_stage, ",", start_time, ",", end_time, ",", time_took.seconds/60)
コード例 #17
0
ファイル: models.py プロジェクト: beforebeta/pennywyse
    def create_short_desc(self):
        try:
            short = self.description.lower()
            if not short:
                return "coupon"
            arr = short.split(" ")

            try:
                if "% off" in short:
                    for i in range(len(arr)):
                        if arr[i].startswith("off"):
                            break
                    return " ".join([arr[i-1], "off"])
            except:
                pass

            try:
                if self.has_deal_type("percent"):
                    for i in range(len(arr)):
                        if arr[i].endswith("%"):
                            return "%s off" % arr[i]
            except:
                pass

            try:
                if self.has_deal_type("dollar"):
                    for i in range(len(arr)):
                        if arr[i].startswith("$"):
                            return "%s off" % arr[i]
            except:
                pass

            try:
                if self.discount and self.discount > 0:
                    return "$%s off" % int(self.discount)
            except:
                pass

            if self.has_deal_type("gift"):
                return "gift"

            if self.has_deal_type("sale"):
                return "sale"

            if self.has_deal_type("offer"):
                return "offer"

            if self.has_deal_type("freeshipping") or self.has_deal_type("totallyfreeshipping"):
                return "free ship"
        except:
            print self.ref_id, "Description is", self.description
            print_stack_trace()
        return "coupon"
コード例 #18
0
ファイル: fmtcload.py プロジェクト: beforebeta/pennywyse
def embedly(args):
    _from = 0
    _to = Merchant.objects.all().count()
    if len(args) == 2:
        _from = int(args[0])
        _to = int(args[1])
        if _to == 1:
            _to = Merchant.objects.all().count()
    print "loading from", _from, "to", _to
    for merchant in Merchant.objects.all()[_from:_to]:
        try:
            EmbedlyMerchant(merchant).update_coupons()
        except:
            print_stack_trace()
コード例 #19
0
ファイル: fmtcload.py プロジェクト: beforebeta/pennywyse
def embedly(args):
    _from = 0
    _to = Merchant.objects.all().count()
    if len(args) == 2:
        _from = int(args[0])
        _to = int(args[1])
        if _to == 1:
            _to = Merchant.objects.all().count()
    print "loading from", _from, "to", _to
    for merchant in Merchant.objects.all()[_from:_to]:
        try:
            EmbedlyMerchant(merchant).update_coupons()
        except:
            print_stack_trace()
コード例 #20
0
ファイル: fmtcload.py プロジェクト: beforebeta/pennywyse
def refresh_calculated_fields():
    section("Refresh Calculated Fields")
    for m in Merchant.objects.all():
        print 'Calculating coupons for %s' % m.name
        try:
            m.refresh_coupon_count()
        except:
            print "Error with: ", m.name, m.id
            print_stack_trace()

    regex = r'/o/(?P<coupon_id>[\d]+)/$'
    for ct in ClickTrack.objects.filter(coupon__isnull=True):
        print 'Processing click track %s' % ct.id
        r = re.search(regex, ct.target_url)
        if r:
            try:
                coupon = Coupon.objects.get(pk=r.groups()[0])
                ct.coupon = coupon
            except Coupon.DoesNotExist:
                ct.coupon = None
            ct.save()
    for ct in ClickTrack.objects.filter(merchant__isnull=True):
        print 'Processing click track %s' % ct.id
        if ct.coupon:
            ct.coupon.merchant = ct.coupon.merchant
            ct.save()

    tracks = ClickTrack.objects.exclude(coupon__isnull=True).values('coupon_id')\
                                                            .annotate(popularity=Count('coupon__id'))
    for track in tracks:
        Coupon.objects.filter(id=track['coupon_id']).update(
            popularity=track['popularity'])

    tracks = ClickTrack.objects.exclude(merchant__isnull=True).values('merchant_id')\
                                                            .annotate(popularity=Count('merchant__id'))
    for track in tracks:
        print 'Processing click track %s' % track['merchant_id']
        Merchant.objects.filter(id=track['merchant_id']).update(
            popularity=track['popularity'])

    for c in Coupon.objects.filter(coupon_type__isnull=True).only(
            'id', 'categories', 'dealtypes'):
        print 'Calculating coupon type for coupon %s' % c.id
        c.coupon_type = c.get_coupon_type()
        c.save()

    Coupon.objects.exclude(
        Q(end__gt=datetime.datetime.now())
        | Q(end__isnull=True)).update(is_active=False)
コード例 #21
0
ファイル: sqootlegacy.py プロジェクト: beforebeta/pennywyse
def prepare_list_of_deals_to_scrub():
    start_time = time.time()
    deals_to_scrub = Coupon.all_objects.filter(
        pk__in=SCRUB_LIST).order_by('merchant__name')

    probably_dup_deals_list = [
    ]  # List of coupon pks that look like a duplicate.
    probably_dup_deals_list = crosscheck_by_field(deals_to_scrub,
                                                  probably_dup_deals_list,
                                                  'coupon_directlink')
    probably_dup_deals_list = crosscheck_by_field(deals_to_scrub,
                                                  probably_dup_deals_list,
                                                  'merchant_name')
    probably_dup_deals_list = list(set(probably_dup_deals_list))

    print "merchant_pk^merchant_ref_id^merchant_name^address^locality^region^postal_code^coupon_pk^coupon_ref_id^coupon_title^coupon_short_title^parent_category^child_category^deal_price^deal_value^provider^link^is_duplicate?"

    for d in deals_to_scrub:
        categories = d.categories.all()
        parent_category = [cat for cat in categories if cat.parent == None]
        parent_category = parent_category[0].name if parent_category else None
        child_category = [cat for cat in categories if cat.parent != None]
        child_category = child_category[0].name if child_category else None

        address = d.merchant_location.address if d.merchant_location.address else ""
        locality = d.merchant_location.locality if d.merchant_location.locality else ""
        region = d.merchant_location.region if d.merchant_location.region else ""
        postal_code = d.merchant_location.postal_code if d.merchant_location.postal_code else ""

        if d.pk in probably_dup_deals_list:
            is_duplicate = 1
        else:
            is_duplicate = 0

        try:
            print "%s^%s^%s^%s^%s^%s^%s^%s^%s^%s^%s^%s^%s^%s^%s^%s^%s^%s" %\
                  (d.merchant.pk, d.merchant.ref_id, d.merchant.name.lower(), address, locality, region, postal_code, d.pk, d.ref_id, d.embedly_title,\
                   d.embedly_description, parent_category, child_category, d.price, d.listprice, d.coupon_network.name, d.directlink, is_duplicate)
        except:
            print "!!!ERROR: merchant_pk == {}".format(d.merchant.pk)
            print_stack_trace()
            continue

    end_time = time.time()
    time_elapsed = end_time - start_time
    print time_elapsed
コード例 #22
0
def write_sqoot_log(finished_stage, start_time, end_time):
    time_took = end_time - start_time

    try:
        with open(SQOOT_LOG_PATH, 'a') as csvfile:
            log_writer = csv.writer(csvfile)
            log_writer.writerow([
                finished_stage,
                start_time,
                end_time,
                time_took.seconds / 60,
            ])
        csvfile.close()
    except:
        print_stack_trace()
        print "^-- WARNING: Problem logging it: {}{}{}{}{}{}{}"\
                .format(finished_stage, ",", start_time, ",", end_time, ",", time_took.seconds/60)
コード例 #23
0
ファイル: mobile_api.py プロジェクト: beforebeta/pennywyse
 def create_localinfo_index_if_doesnt_exist(self):
     if not self.es.indices.exists(index='localinfo'):
         try:
             settings_and_mappings = {
                 "mappings": {
                     "populars": {
                         "properties": {
                             "user_uuid": {"type": "string"},
                             "location": {"type": "geo_point"},
                             "search_keyword": {"type": "string"},
                             "search_category": {"type": "string"}
                         }
                     }
                 }
             }
             self.es.indices.create(index='localinfo', body=settings_and_mappings)
         except:
             print_stack_trace()
コード例 #24
0
ファイル: subscriptions.py プロジェクト: beforebeta/pennywyse
def email_subscribe(request):
    try:
        e = EmailSubscription()
        e.app = request.POST["app"]
        e.email = request.POST["email"]
        e.session_key = request.POST["session_key"]
        try: e.first_name  = request.POST["first_name"]
        except: pass
        try: e.last_name  = request.POST["last_name"]
        except: pass
        try: e.full_name  = request.POST["full_name"]
        except: pass
        try: e.context = request.POST["context"]
        except: pass
        e.save()
        return HttpResponse('1')
    except:
        print_stack_trace()
コード例 #25
0
ファイル: fmtcload.py プロジェクト: beforebeta/pennywyse
def refresh_calculated_fields():
    section("Refresh Calculated Fields")
    for m in Merchant.objects.all():
        print 'Calculating coupons for %s' % m.name
        try:
            m.refresh_coupon_count()
        except:
            print "Error with: ", m.name, m.id
            print_stack_trace()
    
    regex = r'/o/(?P<coupon_id>[\d]+)/$'
    for ct in ClickTrack.objects.filter(coupon__isnull=True):
        print 'Processing click track %s' % ct.id
        r = re.search(regex, ct.target_url)
        if r:
            try:
                coupon = Coupon.objects.get(pk=r.groups()[0])
                ct.coupon = coupon
            except Coupon.DoesNotExist:
                ct.coupon = None
            ct.save()
    for ct in ClickTrack.objects.filter(merchant__isnull=True):
        print 'Processing click track %s' % ct.id
        if ct.coupon:
            ct.coupon.merchant = ct.coupon.merchant
            ct.save()
    
    tracks = ClickTrack.objects.exclude(coupon__isnull=True).values('coupon_id')\
                                                            .annotate(popularity=Count('coupon__id'))
    for track in tracks:
        Coupon.objects.filter(id=track['coupon_id']).update(popularity=track['popularity'])
    
    tracks = ClickTrack.objects.exclude(merchant__isnull=True).values('merchant_id')\
                                                            .annotate(popularity=Count('merchant__id'))
    for track in tracks:
        print 'Processing click track %s' % track['merchant_id']
        Merchant.objects.filter(id=track['merchant_id']).update(popularity=track['popularity'])
    
    for c in Coupon.objects.filter(coupon_type__isnull=True).only('id', 'categories', 'dealtypes'):
        print 'Calculating coupon type for coupon %s' % c.id
        c.coupon_type = c.get_coupon_type()
        c.save()
    
    Coupon.objects.exclude(Q(end__gt=datetime.datetime.now()) | Q(end__isnull=True)).update(is_active=False)
コード例 #26
0
ファイル: models.py プロジェクト: beforebeta/pennywyse
 def create_from_skimlinks_commissions(self, commissions):
     default_to_empty_string = lambda x: "" if x == None else x
     for c in commissions["skimlinksAccount"]["commissions"].keys():
         try:
             commission = commissions["skimlinksAccount"]["commissions"][c]
             if self.filter(
                     commissionID=commission["commissionID"]).count() > 0:
                 continue  #commission already recorded
             comm = Commission(
                 commissionID=commission["commissionID"],
                 commissionType="skimlinks",
                 commissionValue=float(commission["commissionValue"]) /
                 100,  #values comes in cents - we convert to dollars)
                 orderValue=float(commission["orderValue"]) /
                 100,  #values comes in cents - we convert to dollars)
                 currency=default_to_empty_string(commission["currency"]),
                 customID=default_to_empty_string(commission["customID"]),
                 date=datetime.strptime(commission["date"],
                                        "%Y-%m-%d").date(),
                 domainID=default_to_empty_string(commission["domainID"]),
                 merchantID=default_to_empty_string(
                     commission["merchantID"]),
                 publisherID=default_to_empty_string(
                     commission["publisherID"]),
                 items=int(commission["items"])
                 if commission["items"] is not None else 0,
                 sales=int(commission["sales"])
                 if commission["sales"] is not None else 0,
                 remoteReferer=default_to_empty_string(
                     commission["remoteReferer"]),
                 remoteUserAgent=default_to_empty_string(
                     commission["remoteUserAgent"]),
                 url=default_to_empty_string(commission["url"]),
                 domain=default_to_empty_string(
                     shorten_to_domain(commission["url"]
                                       ) if commission["url"] else ""),
                 status=default_to_empty_string(commission["status"]))
             comm.save()
         except:
             print json.dumps(
                 commissions["skimlinksAccount"]["commissions"][c],
                 indent=4)
             print_stack_trace()
コード例 #27
0
ファイル: sqootutils.py プロジェクト: beforebeta/pennywyse
def fetch_page(sqoot_url, tries=1):
    '''
    Summary: Check if url is valid and return a boolean with a response.
    '''
    try:
        if not sqoot_url:
            return True, None
        response = requests.get(sqoot_url, timeout=5)
        if response.status_code != 200:
            return True, None
        return False, response
    except Exception, e:
        print_stack_trace()
        print "^---- Offending URL: ", sqoot_url
        if tries < 3:
            print "Retrying in 5 seconds, maybe the server just needs a break"
            time.sleep(5)
            return fetch_page(sqoot_url, tries+1)
        else:
            raise e #reraise exception
コード例 #28
0
ファイル: utils.py プロジェクト: beforebeta/pennywyse
def get_visitor_tag(url, visitor_id):
    from core.util import print_stack_trace
    try:
        if 'go.redirectingat.com' in url:
            parsed = urlparse(url)
            query_dict = parse_qs(parsed.query)
            for key in query_dict.keys():
                query_dict[key] = query_dict[key][0]
            if not 'xcust' in query_dict.keys():
                query_dict['xcust'] = ''
            query_dict['xcust'] = visitor_id
            url = query_dict['url']
            del query_dict['url']
            return 'http://go.redirectingat.com/?%s&%s' % (
                urllib.urlencode(query_dict), urllib.urlencode({'url': url}))
        else:
            return url
    except:
        print_stack_trace()
        return url
コード例 #29
0
ファイル: utils.py プロジェクト: beforebeta/pennywyse
def get_visitor_tag(url, visitor_id):
    from core.util import print_stack_trace
    try:
        if 'go.redirectingat.com' in url:
            parsed = urlparse(url)
            query_dict = parse_qs(parsed.query)
            for key in query_dict.keys():
                query_dict[key] = query_dict[key][0]
            if not 'xcust' in query_dict.keys():
                query_dict['xcust'] = ''
            query_dict['xcust'] = visitor_id
            url = query_dict['url']
            del query_dict['url']
            return 'http://go.redirectingat.com/?%s&%s' % (urllib.urlencode(query_dict),
                                                           urllib.urlencode({'url':url}))
        else:
            return url
    except:
        print_stack_trace()
        return url
コード例 #30
0
def fetch_page(sqoot_url, tries=1):
    '''
    Summary: Check if url is valid and return a boolean with a response.
    '''
    try:
        if not sqoot_url:
            return True, None
        response = requests.get(sqoot_url, timeout=5)
        if response.status_code != 200:
            return True, None
        return False, response
    except Exception, e:
        print_stack_trace()
        print "^---- Offending URL: ", sqoot_url
        if tries < 3:
            print "Retrying in 5 seconds, maybe the server just needs a break"
            time.sleep(5)
            return fetch_page(sqoot_url, tries + 1)
        else:
            raise e  #reraise exception
コード例 #31
0
ファイル: sqootlegacy.py プロジェクト: beforebeta/pennywyse
def prepare_list_of_deals_to_scrub():
    start_time = time.time()
    deals_to_scrub = Coupon.all_objects.filter(pk__in=SCRUB_LIST).order_by('merchant__name')

    probably_dup_deals_list = [] # List of coupon pks that look like a duplicate.
    probably_dup_deals_list = crosscheck_by_field(deals_to_scrub, probably_dup_deals_list, 'coupon_directlink')
    probably_dup_deals_list = crosscheck_by_field(deals_to_scrub, probably_dup_deals_list, 'merchant_name')
    probably_dup_deals_list = list(set(probably_dup_deals_list))

    print "merchant_pk^merchant_ref_id^merchant_name^address^locality^region^postal_code^coupon_pk^coupon_ref_id^coupon_title^coupon_short_title^parent_category^child_category^deal_price^deal_value^provider^link^is_duplicate?"

    for d in deals_to_scrub:
        categories      = d.categories.all()
        parent_category = [cat for cat in categories if cat.parent == None]
        parent_category = parent_category[0].name if parent_category else None
        child_category  = [cat for cat in categories if cat.parent != None]
        child_category  = child_category[0].name if child_category else None

        address         = d.merchant_location.address if d.merchant_location.address else ""
        locality        = d.merchant_location.locality if d.merchant_location.locality else ""
        region          = d.merchant_location.region if d.merchant_location.region else ""
        postal_code     = d.merchant_location.postal_code if d.merchant_location.postal_code else ""

        if d.pk in probably_dup_deals_list:
            is_duplicate = 1
        else:
            is_duplicate = 0

        try:
            print "%s^%s^%s^%s^%s^%s^%s^%s^%s^%s^%s^%s^%s^%s^%s^%s^%s^%s" %\
                  (d.merchant.pk, d.merchant.ref_id, d.merchant.name.lower(), address, locality, region, postal_code, d.pk, d.ref_id, d.embedly_title,\
                   d.embedly_description, parent_category, child_category, d.price, d.listprice, d.coupon_network.name, d.directlink, is_duplicate)
        except:
            print "!!!ERROR: merchant_pk == {}".format(d.merchant.pk)
            print_stack_trace()
            continue

    end_time = time.time()
    time_elapsed = end_time - start_time
    print time_elapsed
コード例 #32
0
ファイル: sqootutils.py プロジェクト: beforebeta/pennywyse
def read_sqoot_log(current_stage):
    row_to_lookup, column_to_lookup = LOOKUP_PER_STAGE[current_stage]

    try:
        f = open(SQOOT_LOG_PATH, 'r')
    except IOError:
        print_stack_trace()

    all_rows = f.readlines()
    if len(all_rows) == 1:
        f.close()
        return None
    else:
        last_ten_rows = all_rows[-10:]
        latest_runs_of_this_step = [r for r in last_ten_rows if r.replace('\r\n', '').split(',')[0] == row_to_lookup]
        if len(latest_runs_of_this_step) == 0:
            f.close()
            return None
        very_last_run = latest_runs_of_this_step[-1]
        timestamp_string = very_last_run.replace('\r\n', '').split(',')[column_to_lookup]
        timestamp_wanted = parse(timestamp_string)
        f.close()
        return timestamp_wanted
コード例 #33
0
ファイル: sqootlegacy.py プロジェクト: beforebeta/pennywyse
 def handle(self, *args, **options):
     if options['savedown']:
         try:
             savedown_sqoot_data()
         except:
             print_stack_trace()
     if options['analyze']:
         try:
             analyze_sqoot_deals()
         except:
             print_stack_trace()
     if options['scrubprepare']:
         try:
             prepare_list_of_deals_to_scrub()
         except:
             print_stack_trace()
     if options['scrubexecute']:
         try:
             read_scrub_list_and_update(args)
         except:
             print_stack_trace()
コード例 #34
0
ファイル: sqootlegacy.py プロジェクト: beforebeta/pennywyse
 def handle(self, *args, **options):
     if options['savedown']:
         try:
             savedown_sqoot_data()
         except:
             print_stack_trace()
     if options['analyze']:
         try:
             analyze_sqoot_deals()
         except:
             print_stack_trace()
     if options['scrubprepare']:
         try:
             prepare_list_of_deals_to_scrub()
         except:
             print_stack_trace()
     if options['scrubexecute']:
         try:
             read_scrub_list_and_update(args)
         except:
             print_stack_trace()
コード例 #35
0
ファイル: sqootlegacy.py プロジェクト: beforebeta/pennywyse
def read_scrub_list_and_update(args):
    try:
        filename = args[0]
    except:
        pass

    # Thomas' Bing Geocoder api key (free basic access)
    # dotus_geocoder  = geocoders.GeocoderDotUS() for consideration as a fallback
    bing_geocoder = geocoders.Bing(
        'AvxLEwiPhVJzf0S3Pozgg01NnUQQX0RR6g9K46VPLlZ8OfZkKS-76gaPyzoV6IHI')

    path = os.path.join(settings.BASE_DIR, 'readonly', filename)
    try:
        f = open(path)
    except IOError:
        print_stack_trace()

    rows = []
    for row in f:
        rows.append(row.replace("\r\n", "").split("\t"))

    for row in rows[1:]:  # Skip the header
        try:
            coupon_pk = int(row[1])
            is_duplicate = True if row[3] == '1' else False
            is_inactive = True if row[4] == '1' else False
            is_category_wrong = True if row[5] == '1' else False
            is_location_wrong = True if row[8] == '1' else False
            correction_needed = is_duplicate or is_inactive or is_category_wrong or is_location_wrong

            if correction_needed:
                coupon_obj = Coupon.all_objects.get(pk=coupon_pk)
                if is_duplicate:
                    coupon_obj.is_duplicate = True
                    # print "Correction: ", coupon_pk, " is_duplicate=True" #DEBUG
                if is_inactive:
                    coupon_obj.status = 'confirmed-inactive'
                    # print "Correction: ", coupon_pk, " status=confirmed-inactive" #DEBUG
                if is_category_wrong:
                    coupon_obj.categories.clear()
                    try:
                        parent_category = Category.objects.get(
                            ref_id_source='sqoot', name=row[6])
                        coupon_obj.categories.add(parent_category)
                        # print "Correction: ", coupon_pk, " Parent category -> ", parent_category.name #DEBUG
                    except:
                        pass

                    try:
                        child_category = Category.objects.get(
                            ref_id_source='sqoot', name=row[7])
                        coupon_obj.categories.add(child_category)
                        # print "Correction: ", coupon_pk, " Child category -> ", child_category.name #DEBUG
                    except:
                        pass
                if is_location_wrong:
                    location_obj = coupon_obj.merchant_location
                    address = row[9] if row[9] != '' else ''
                    locality = row[10] if row[10] != '' else ''
                    region = row[11] if row[11] != '' else ''
                    postal_code = row[12] if row[12] != '' else ''
                    spacer1 = ', ' if address != '' else ''
                    spacer2 = ' ' if locality != '' else ''
                    lookup_text = address + spacer1 + locality + spacer2 + region

                    try:
                        place, (lat, lng) = bing_geocoder.geocode(lookup_text)
                        pnt = 'POINT({} {})'.format(lng, lat)
                        location_obj.geometry = pnt
                    except:
                        pass

                    location_obj.address = address if address != '' else location_obj.address
                    location_obj.locality = locality if locality != '' else location_obj.locality
                    location_obj.region = region if region != '' else location_obj.region
                    location_obj.postal_code = postal_code if postal_code != '' else location_obj.postal_code
                    location_obj.save()
                    # print "Correction: ", coupon_pk, " Location fixed" #DEBUG
                coupon_obj.save()
        except:
            print_stack_trace()

    scrub_list_retrieved = [
        row[1] for row in rows[1:]
    ]  # list of original coupon pks imported from 'scrub_list.py'
    deals_to_scrub = Coupon.all_objects.filter(pk__in=scrub_list_retrieved)\
                                       .exclude(Q(status='confirmed-inactive') | Q(status='implied-inactive') | Q(is_duplicate=True))\
                                       .order_by('merchant__name')

    probably_dup_deals_list = [
    ]  # List of coupon pks that look like a duplicate.
    probably_dup_deals_list = crosscheck_by_field(deals_to_scrub,
                                                  probably_dup_deals_list,
                                                  'coupon_directlink')
    probably_dup_deals_list = crosscheck_by_field(deals_to_scrub,
                                                  probably_dup_deals_list,
                                                  'merchant_name')
    probably_dup_deals_list = list(set(probably_dup_deals_list))

    for pk in probably_dup_deals_list:
        try:
            coupon = Coupon.all_objects.get(pk=pk)
            coupon.is_duplicate = True
            coupon.save()
            # print "Correction: ", coupon_pk, " is_duplicate=True" #DEBUG
        except:
            print_stack_trace()
コード例 #36
0
ファイル: fmtcload.py プロジェクト: beforebeta/pennywyse
def refresh_deals():
    section("Loading Deals/Coupons")
    fh = _download_content(
        "http://services.formetocoupon.com/getDeals?key=%s" %
        settings.FMTC_ACCESS_KEY,
        "Deals_Content_%s" % datetime.datetime.now().strftime(DATETIME_FORMAT))
    data = etree.iterparse(fh, tag='item')
    for event, deal in data:
        try:
            id = deal.find('couponid').text
            coupon, created = Coupon.active_objects.get_or_create(ref_id=id)
            if not created: continue
            merchant_name = deal.find('merchantname').text
            merchantid = deal.find('merchantid').text
            merchant, created = Merchant.objects.get_or_create(
                name=merchant_name)
            coupon.merchant = merchant

            coupon.categories.clear()
            for category in deal.find("categories"):
                coupon.categories.add(
                    Category.objects.get(code=category.text,
                                         ref_id_source__isnull=True))
            coupon.dealtypes.clear()
            dealtypes = deal.find('dealtypes')
            for dealtype in dealtypes.findall("type"):
                coupon.dealtypes.add(DealType.objects.get(code=dealtype.text))

            coupon.description = unescape_html(deal.find('label').text)
            restrictions = deal.find('restrictions').text or ''
            coupon.restrictions = unescape_html(restrictions)
            coupon_code = deal.find('couponcode').text or ''
            coupon.code = unescape_html(coupon_code)

            coupon.start = get_dt(deal.find('startdate').text)
            coupon.end = get_dt(deal.find('enddate').text)
            coupon.lastupdated = get_dt(deal.find('lastupdated').text)
            coupon.created = get_dt(deal.find('created').text)
            coupon.link = deal.find('link').text

            # removing skimlinks prefix from coupon link
            coupon.link = extract_url_from_skimlinks(deal.find('link').text)

            coupon.directlink = deal.find('directlink').text
            coupon.skimlinks = deal.find('skimlinks').text
            coupon.status = deal.find('status').text

            coupon.countries.clear()

            for country in deal.findall("country"):
                c, created = Country.objects.get_or_create(code=country.text)
                c.name = country.text
                c.save()
                coupon.countries.add(c)

            coupon.price = deal.find('price').text
            coupon.discount = deal.find('discount').text
            coupon.listprice = deal.find('listprice').text
            coupon.percent = deal.find('percent').text
            coupon.image = deal.find('image').text
            coupon.save()
        except:
            print_stack_trace()
コード例 #37
0
ファイル: image.py プロジェクト: beforebeta/pennywyse
def _get_image(user, image_url, specific_height=-1, specific_width=-1):
    """
        Returns the image at the image_url of the specific height and width
        if image is not locally buffers, then it downloads the original image at the url
        if specific height and width are set, it also creates a resized version of the image
    """
    def _download_image_to_local(src_image_url, src_image_pointer, height, width):
        prefix,ext = os.path.splitext(urlparse.urlparse(src_image_url).path)
        prefix = prefix.replace('/', '').replace('\\', '')
        prefix = url2path(prefix)

        filename = '%s_%s%s' % (uuid.uuid4().hex, uuid.uuid4().hex, ext)
        local_copy = os.path.join(settings.IMAGE_LOCAL_COPY_DIR, filename)
        local_url = os.path.join(settings.IMAGE_LOCAL_COPY_DIR_NO_PREFIX, filename)
        file_saved_path = local_copy

        if height == -1:
            #ensure exists
            assert src_image_pointer.status_code == 200

            #ensure is image!
            content_type = src_image_pointer.headers['content-type'].lower()

            if content_type[:5] != 'image':
                #cloudfront servers typically hosting images as octet-streams
                #we need to handle that
                assert 'application/octet-stream' in content_type
                #also in these cases we don't allow files greater than 700KB -> 716800 = 700*1024
                assert int('30611') < 716800

            if src_image_pointer.status_code == 200:
                #download remote file
                file_saved_path = local_copy
                local_copy = open(local_copy, 'wb')
                local_copy.write(src_image_pointer.content)
                local_copy.close()
            else:
                raise Http404()
        else:
            img_util.resize(src_image_pointer, (specific_width, specific_height), True, local_copy)

#        s3_url = s3.upload(file_saved_path)

        #store reference in imagestore
#        img = ImageStore(remote_url=image_url, local_url=s3_url, source_user=user, height=height, width=width)
        img = ImageStore(remote_url=image_url, local_url="/%s" % local_url, source_user=user, height=height, width=width)
        img.save()
        return img

    def _download_temp_image(src_image_url, src_image_pointer, height, width):
        prefix,ext = os.path.splitext(urlparse.urlparse(src_image_url).path)
        prefix = prefix.replace('/', '').replace('\\', '')
        prefix = url2path(prefix)

        filename = '%s_%s%s' % (uuid.uuid4().hex, uuid.uuid4().hex, ext)
        local_copy = os.path.join(settings.IMAGE_LOCAL_COPY_DIR, filename)
        local_url = os.path.join(settings.IMAGE_LOCAL_COPY_DIR_NO_PREFIX, filename)

        if height == -1:
            #ensure exists
            assert src_image_pointer.status_code == 200

            #ensure is image!
            content_type = src_image_pointer.headers['content-type'].lower()

            if content_type[:5] != 'image':
                #cloudfront servers typically hosting images as octet-streams
                #we need to handle that
                assert 'application/octet-stream' in content_type
                #also in these cases we don't allow files greater than 700KB -> 716800 = 700*1024
                assert int('30611') < 716800

            if src_image_pointer.status_code == 200:
                #download remote file
                local_copy = open(local_copy, 'wb')
                local_copy.write(src_image_pointer.content)
                local_copy.close()
            else:
                raise Http404()

        return local_url

    if user.is_anonymous() or not(user.is_authenticated()):
        try:
            user = User.objects.get(username=IMAGE_ANONYMOUS_USER)
        except:
            print_stack_trace()
            user = User.objects.create_user(username=IMAGE_ANONYMOUS_USER, email='*****@*****.**', password=IMAGE_ANONYMOUS_USER)

    image_url = urllib.unquote_plus(image_url)

    original_image = None

    #check if image already exists
    image_url_to_check = image_url
    if ShortenedURL.objects.should_shorten_url(image_url):
        image_url_to_check = ShortenedURL.objects.shorten_url(image_url).shortened_url

    for image in ImageStore.objects.filter(remote_url = image_url_to_check):
        if image.height == specific_height and image.width == specific_width:
            #found the   image with the exact dimensions
            return image
        if image.height == image.width == -1:
            #found the original image
            original_image = image

    if not original_image:
        #original image not available
        #download original image
        original_image = _download_image_to_local(image_url, requests.get(image_url), -1, -1)
    else:
        original_image.local_url = _download_temp_image(image_url, requests.get(image_url), -1, -1)

    if specific_height == specific_width == -1:
#        os.remove(os.path.join(settings.IMAGE_LOCAL_COPY_DIR, original_image.local_url[1:].split('/')[-1]))
        return original_image
    else:
        #required image is not available
        #resize original image to required image
        resized_image = _download_image_to_local(image_url, os.path.join(settings.IMAGE_LOCAL_COPY_DIR, original_image.local_url[1:].split('/')[-1]), specific_height, specific_width)
#        os.remove(os.path.join(settings.IMAGE_LOCAL_COPY_DIR, resized_image.local_url[1:].split('/')[-1]))
#        os.remove(os.path.join(settings.IMAGE_LOCAL_COPY_DIR, original_image.local_url[1:].split('/')[-1]))
        return resized_image
コード例 #38
0
ファイル: visitor.py プロジェクト: beforebeta/pennywyse
    def process_request(self, request):
        # don't process AJAX requests
        if request.path.startswith("/s/") or request.path.startswith("/static/") or request.path.startswith("/admin/")\
             or request.path.startswith("/favicon.ico") or (request.is_ajax() and not request.path.startswith('/o/')):
            return
        # create some useful variables
        ip_address = utils.get_ip(request)
        user_agent = unicode(request.META.get('HTTP_USER_AGENT', '')[:255], errors='ignore')

        # retrieve untracked user agents from cache
        ua_key = '_tracking_untracked_uas'
        untracked = cache.get(ua_key)
        if untracked is None:
            log.info('Updating untracked user agent cache')
            untracked = UntrackedUserAgent.objects.all()
            cache.set(ua_key, untracked, 3600)

        # see if the user agent is not supposed to be tracked
        for ua in untracked:
            # if the keyword is found in the user agent, stop tracking
            if user_agent.find(ua.keyword) != -1:
                log.debug('Not tracking UA "%s" because of keyword: %s' % (user_agent, ua.keyword))
                return

        if hasattr(request, 'session') and request.session.session_key:
            # use the current session key if we can
            session_key = request.session.session_key
        else:
            # otherwise just fake a session key
            session_key = '%s:%s' % (ip_address, user_agent)
            session_key = session_key[:40]

        # ensure that the request.path does not begin with any of the prefixes
        for prefix in self.prefixes:
            if request.path.startswith(prefix):
                log.debug('Not tracking request to: %s' % request.path)
                return

        # if we get here, the URL needs to be tracked
        # determine what time it is
        now = datetime.now()

        attrs = {
            'session_key': session_key,
            'ip_address': ip_address
        }
        
        visitor_id = request.session.get('visitor_id', None)
        if not visitor_id:
            # for some reason, Visitor.objects.get_or_create was not working here
            try:
                visitor = Visitor.objects.only('id').get(**attrs)
            except Visitor.DoesNotExist:
                request.session.set_test_cookie()
                # see if there's a visitor with the same IP and user agent
                # within the last 5 minutes
                cutoff = now - timedelta(minutes=5)
                visitors = Visitor.objects.only('id').filter(
                    ip_address=ip_address,
                    user_agent=user_agent,
                    last_update__gte=cutoff
                )
    
                if len(visitors):
                    visitor = visitors[0]
                    visitor.session_key = session_key
                    log.debug('Using existing visitor for IP %s / UA %s: %s' % (ip_address, user_agent, visitor.id))
                else:
                    # it's probably safe to assume that the visitor is brand new
                    visitor = Visitor(**attrs)
                    log.debug('Created a new visitor: %s' % attrs)
                try:
                    visitor.save()
                except DatabaseError:
                    print_stack_trace()
                    log.error('There was a problem saving visitor information:\n%s\n\n%s' % (traceback.format_exc(), locals()))
            except:
                return
            
            request.session['visitor_id'] = visitor_id = visitor.id

        redis_data = redis.get('visitor_data_%s' % visitor_id) or '{}'
        visitor_data = json.loads(redis_data)
        visitor_data['visitor_id'] = visitor_id

        # update the tracking information
        visitor_data['user_agent'] = user_agent

        # if the visitor record is new, or the visitor hasn't been here for
        # at least an hour, update their referrer URL
        one_hour_ago = pytz.UTC.localize(now - timedelta(hours=1))
        # TODO: ensure that we are on the same time zone - I just put UTC for now
        # to get it working
        last_update = visitor_data.get('last_update', None)
        if not last_update or last_update <= time.mktime(one_hour_ago.timetuple()):
            visitor_data['referrer'] = utils.u_clean(request.META.get('HTTP_REFERER', 'unknown')[:255])

            # reset the number of pages they've been to
            visitor_data['page_views'] = 0
            visitor_data['session_start'] = time.mktime(now.timetuple())

        visitor_data['url'] = request.path
        page_views = visitor_data.get('page_views', 0) + 1
        visitor_data['page_views'] = page_views
        visitor_data['last_update'] = time.mktime(now.timetuple())

        try:
            # Extracting visitor data from GA cookie
            cookie = request.COOKIES.get('__utmz')
            if cookie:
                try:
                    data = cookie.split('.', 4)[-1]
                    data = dict(match.groups() for match in re.finditer(
                        r'(utm(?:csr|ccn|cmd|ctr))=([^\|]*)', data))
                except (ValueError, IndexError):
                    log.error('Malformed GA cookie: {0!r}'.format(cookie))
                else:
                    visitor_data['source'] = normalize_ga_value(data.get('utmcsr'))
                    visitor_data['medium'] = normalize_ga_value(data.get('utmcmd'))
                    visitor_data['campaign'] = normalize_ga_value(data.get('utmccn'))
                    visitor_data['keywords'] = normalize_ga_value(data.get('utm.ctr'))
            
            utm_source = request.GET.get("utm_source", "unknown")
            request.session['acquisition_source_name'] = utm_source

            if utm_source != "unknown":
                # utm_source: Identify the advertiser, site, publication, etc. that is sending traffic to your property, e.g. google, citysearch, newsletter4, billboard.
                # utm_medium: The advertising or marketing medium, e.g.: cpc, banner, email newsletter.
                # utm_campaign: The individual campaign name, slogan, promo code, etc. for a product.
                # utm_term: Identify paid search keywords. If you're manually tagging paid keyword campaigns, you should also use utm_term to specify the keyword.
                # utm_content: Used to differentiate similar content, or links within the same ad. For example, if you have two call-to-action links within the same email message, you can use utm_content and set different values for each so you can tell which version is more effective.

                #update the tracking info with the latest and bump the old one to be stored in the history
                
                #visitor.bump_past_acquisition_info()
                past_acquisition_info = visitor_data.get('past_acquisition_info', [])
                if visitor_data.get('acquisition_source', None):
                    old_visitor_data = {'date_valid_until': time.time()}
                    for k in VISITOR_PARAMS_MAPPING.keys():
                        old_visitor_data[k] = visitor_data.get(k, None)
                    past_acquisition_info.append(old_visitor_data)
                    visitor_data['past_acquisition_info'] = past_acquisition_info
                for k,v in VISITOR_PARAMS_MAPPING.items():
                    value = request.GET.get(v, 'unknown')[:255]
                    visitor_data[k] = value

        except:
            print_stack_trace()
        redis.set('visitor_data_%s' % visitor_id, json.dumps(visitor_data))
コード例 #39
0
ファイル: coupon_extras.py プロジェクト: beforebeta/pennywyse
def assign_visitor_tag(context, url):
    try:
        return get_visitor_tag(url, context['visitor'].id)
    except:
        print_stack_trace()
        return url
コード例 #40
0
def click_track(request, clicked_link_path=None):
    try:
        referer = utils.u_clean(
            request.META.get('HTTP_REFERER', 'unknown')[:255])
        clicked_link = request.POST["clicked"][:255]
        try:
            clicked_link = clicked_link.lower()
        except:
            print_stack_trace()

        source_url_type = 'landing'
        if re.search('/coupons/[a-z0-9-]+/[a-z0-9-]+/[\d]+/', referer):
            source_url_type = 'coupon'
        elif re.search('/coupons/[a-z0-9-]+/', referer):
            source_url_type = 'company'
        elif re.search('/categories/[A-z0-9-]+/', referer):
            source_url_type = 'category'

        coupon = None
        merchant = None

        if "/coupon/" in clicked_link:
            #skimlinks will assume the source url to be the /coupon/ url
            if clicked_link.endswith("/"):
                coupon_id = clicked_link.split("/")[-2]  #assumes trailing '/'
            else:
                coupon_id = clicked_link.split("/")[-1]
            source_url = clicked_link
            coupon = Coupon.active_objects.get(id=int(coupon_id))
            try:
                merchant = Merchant.objects.get(id=coupon.merchant.id)
            except:
                merchant = None
            target_url = coupon.get_retailer_link()
        else:
            source_url = referer
            target_url = clicked_link
            merchant = None

        if 'go.redirectingat.com' in target_url:
            target_url = _remove_skimlinks(target_url)

        merchant_domain = shorten_to_domain(target_url)
        visitor = Visitor.objects.get(pk=request.session['visitor_id'])
        click_track = ClickTrack()
        click_track.visitor = visitor
        click_track.user_agent = visitor.user_agent[:255]
        click_track.referer = referer[:255]
        click_track.target_url = target_url[:255]
        click_track.source_url_type = source_url_type[:255]
        click_track.source_url = source_url[:255]
        click_track.merchant = merchant
        click_track.coupon = coupon
        click_track.merchant_domain = merchant_domain[:255]

        try:
            click_track.acquisition_source = visitor.acquisition_source
            click_track.acquisition_medium = visitor.acquisition_medium
            click_track.acquisition_term = visitor.acquisition_term
            click_track.acquisition_content = visitor.acquisition_content
            click_track.acquisition_campaign = visitor.acquisition_campaign
            click_track.acquisition_gclid = visitor.acquisition_gclid
        except:
            print_stack_trace()

        try:
            click_track.save()
        except:
            try:
                print "Visitor ID", click_track.visitor
                print "User Agent", click_track.user_agent
                print "Referer", click_track.referer
                print "target_url", click_track.target_url
                print "source_url_type", click_track.source_url_type
                print "merchant", click_track.merchant
                print "coupon", click_track.coupon
                print "merchant_domain", click_track.merchant_domain
                print merchant.name, merchant.id
            except:
                pass
            print_stack_trace()
    except:
        print_stack_trace()
    return success()
コード例 #41
0
ファイル: sqootlegacy.py プロジェクト: beforebeta/pennywyse
def read_scrub_list_and_update(args):
    try:
        filename = args[0]
    except:
        pass

    # Thomas' Bing Geocoder api key (free basic access)
    # dotus_geocoder  = geocoders.GeocoderDotUS() for consideration as a fallback
    bing_geocoder   = geocoders.Bing('AvxLEwiPhVJzf0S3Pozgg01NnUQQX0RR6g9K46VPLlZ8OfZkKS-76gaPyzoV6IHI')

    path = os.path.join(settings.BASE_DIR, 'readonly', filename)
    try:
        f = open(path)
    except IOError:
        print_stack_trace()

    rows = []
    for row in f:
        rows.append(row.replace("\r\n", "").split("\t"))

    for row in rows[1:]: # Skip the header
        try:
            coupon_pk           = int(row[1])
            is_duplicate        = True if row[3] == '1' else False
            is_inactive         = True if row[4] == '1' else False
            is_category_wrong   = True if row[5] == '1' else False
            is_location_wrong   = True if row[8] == '1' else False
            correction_needed   = is_duplicate or is_inactive or is_category_wrong or is_location_wrong

            if correction_needed:
                coupon_obj = Coupon.all_objects.get(pk=coupon_pk)
                if is_duplicate:
                    coupon_obj.is_duplicate = True
                    # print "Correction: ", coupon_pk, " is_duplicate=True" #DEBUG
                if is_inactive:
                    coupon_obj.status = 'confirmed-inactive'
                    # print "Correction: ", coupon_pk, " status=confirmed-inactive" #DEBUG
                if is_category_wrong:
                    coupon_obj.categories.clear()
                    try:
                        parent_category = Category.objects.get(ref_id_source='sqoot', name=row[6])
                        coupon_obj.categories.add(parent_category)
                        # print "Correction: ", coupon_pk, " Parent category -> ", parent_category.name #DEBUG
                    except:
                        pass

                    try:
                        child_category  = Category.objects.get(ref_id_source='sqoot', name=row[7])
                        coupon_obj.categories.add(child_category)
                        # print "Correction: ", coupon_pk, " Child category -> ", child_category.name #DEBUG
                    except:
                        pass
                if is_location_wrong:
                    location_obj = coupon_obj.merchant_location
                    address      = row[9] if row[9] != '' else ''
                    locality     = row[10] if row[10] != '' else ''
                    region       = row[11] if row[11] != '' else ''
                    postal_code  = row[12] if row[12] != '' else ''
                    spacer1      = ', ' if address != '' else ''
                    spacer2      = ' ' if locality != '' else ''
                    lookup_text  = address + spacer1 + locality + spacer2 + region

                    try:
                        place, (lat, lng) = bing_geocoder.geocode(lookup_text)
                        pnt = 'POINT({} {})'.format(lng, lat)
                        location_obj.geometry = pnt
                    except:
                        pass

                    location_obj.address     = address if address != '' else location_obj.address
                    location_obj.locality    = locality if locality != '' else location_obj.locality
                    location_obj.region      = region if region != '' else location_obj.region
                    location_obj.postal_code = postal_code if postal_code != '' else location_obj.postal_code
                    location_obj.save()
                    # print "Correction: ", coupon_pk, " Location fixed" #DEBUG
                coupon_obj.save()
        except:
            print_stack_trace()

    scrub_list_retrieved = [row[1] for row in rows[1:]] # list of original coupon pks imported from 'scrub_list.py'
    deals_to_scrub = Coupon.all_objects.filter(pk__in=scrub_list_retrieved)\
                                       .exclude(Q(status='confirmed-inactive') | Q(status='implied-inactive') | Q(is_duplicate=True))\
                                       .order_by('merchant__name')

    probably_dup_deals_list = [] # List of coupon pks that look like a duplicate.
    probably_dup_deals_list = crosscheck_by_field(deals_to_scrub, probably_dup_deals_list, 'coupon_directlink')
    probably_dup_deals_list = crosscheck_by_field(deals_to_scrub, probably_dup_deals_list, 'merchant_name')
    probably_dup_deals_list = list(set(probably_dup_deals_list))

    for pk in probably_dup_deals_list:
        try:
            coupon = Coupon.all_objects.get(pk=pk)
            coupon.is_duplicate = True
            coupon.save()
            # print "Correction: ", coupon_pk, " is_duplicate=True" #DEBUG
        except:
            print_stack_trace()
コード例 #42
0
ファイル: coupon_extras.py プロジェクト: beforebeta/pennywyse
def assign_visitor_tag(context, url):
    try:
        return get_visitor_tag(url, context['visitor'].id)
    except:
        print_stack_trace()
        return url
コード例 #43
0
ファイル: views.py プロジェクト: beforebeta/pennywyse
def log_click_track(request, coupon=None):
    try:
        referer = utils.u_clean(request.META.get('HTTP_REFERER', 'unknown')[:255])
        clicked_link = request.path
        source_url_type='landing'

        if re.search('/coupons/[a-z0-9-]+/[a-z0-9-]+/[\d]+/', referer):
            source_url_type = 'coupon'
        elif re.search('/coupons/[a-z0-9-]+/', referer):
            source_url_type = 'company'
        elif re.search('/categories/[A-z0-9-]+/', referer):
            source_url_type = 'category'

        merchant=None

        if "/o/" in clicked_link:
            source_url = clicked_link
            merchant = coupon.merchant
            target_url = coupon.get_retailer_link()
        else:
            source_url = referer
            target_url = clicked_link
            merchant = None

        if 'go.redirectingat.com' in target_url:
            target_url = _remove_skimlinks(target_url)

        merchant_domain = shorten_to_domain(target_url)
        visitor = Visitor.objects.get(pk=request.session['visitor_id'])
        click_track = ClickTrack(visitor=visitor, user_agent=visitor.user_agent[:255], referer=referer[:255],
                                 target_url=target_url[:255], source_url_type=source_url_type[:255],
                                 source_url=source_url[:255], merchant=merchant, coupon=coupon, 
                                 merchant_domain=merchant_domain[:255])

        try:
            click_track.acquisition_source      = visitor.acquisition_source
            click_track.acquisition_medium      = visitor.acquisition_medium
            click_track.acquisition_term        = visitor.acquisition_term
            click_track.acquisition_content     = visitor.acquisition_content
            click_track.acquisition_campaign    = visitor.acquisition_campaign
            click_track.acquisition_gclid       = visitor.acquisition_gclid
        except:
            print_stack_trace()

        try:
            click_track.save()
        except:
            try:
                print "Visitor ID", click_track.visitor
                print "User Agent", click_track.user_agent
                print "Referer", click_track.referer
                print "target_url", click_track.target_url
                print "source_url_type", click_track.source_url_type
                print "merchant", click_track.merchant
                print "coupon", click_track.coupon
                print "merchant_domain", click_track.merchant_domain
                print merchant.name, merchant.id
            except:
                pass
            print_stack_trace()
    except:
        print_stack_trace()
コード例 #44
0
ファイル: models.py プロジェクト: beforebeta/pennywyse
 def update_coupons(self):
       for coupon in self.coupons:
           try:
               coupon.update()
           except:
               print_stack_trace()
コード例 #45
0
def log_click_track(request, coupon=None):
    try:
        referer = utils.u_clean(
            request.META.get('HTTP_REFERER', 'unknown')[:255])
        clicked_link = request.path
        source_url_type = 'landing'

        if re.search('/coupons/[a-z0-9-]+/[a-z0-9-]+/[\d]+/', referer):
            source_url_type = 'coupon'
        elif re.search('/coupons/[a-z0-9-]+/', referer):
            source_url_type = 'company'
        elif re.search('/categories/[A-z0-9-]+/', referer):
            source_url_type = 'category'

        merchant = None

        if "/o/" in clicked_link:
            source_url = clicked_link
            merchant = coupon.merchant
            target_url = coupon.get_retailer_link()
        else:
            source_url = referer
            target_url = clicked_link
            merchant = None

        if 'go.redirectingat.com' in target_url:
            target_url = _remove_skimlinks(target_url)

        merchant_domain = shorten_to_domain(target_url)
        visitor = Visitor.objects.get(pk=request.session['visitor_id'])
        click_track = ClickTrack(visitor=visitor,
                                 user_agent=visitor.user_agent[:255],
                                 referer=referer[:255],
                                 target_url=target_url[:255],
                                 source_url_type=source_url_type[:255],
                                 source_url=source_url[:255],
                                 merchant=merchant,
                                 coupon=coupon,
                                 merchant_domain=merchant_domain[:255])

        try:
            click_track.acquisition_source = visitor.acquisition_source
            click_track.acquisition_medium = visitor.acquisition_medium
            click_track.acquisition_term = visitor.acquisition_term
            click_track.acquisition_content = visitor.acquisition_content
            click_track.acquisition_campaign = visitor.acquisition_campaign
            click_track.acquisition_gclid = visitor.acquisition_gclid
        except:
            print_stack_trace()

        try:
            click_track.save()
        except:
            try:
                print "Visitor ID", click_track.visitor
                print "User Agent", click_track.user_agent
                print "Referer", click_track.referer
                print "target_url", click_track.target_url
                print "source_url_type", click_track.source_url_type
                print "merchant", click_track.merchant
                print "coupon", click_track.coupon
                print "merchant_domain", click_track.merchant_domain
                print merchant.name, merchant.id
            except:
                pass
            print_stack_trace()
    except:
        print_stack_trace()
コード例 #46
0
 def update_coupons(self):
     for coupon in self.coupons:
         try:
             coupon.update()
         except:
             print_stack_trace()
コード例 #47
0
ファイル: visitor.py プロジェクト: beforebeta/pennywyse
    def process_request(self, request):
        # don't process AJAX requests
        if request.path.startswith("/s/") or request.path.startswith("/static/") or request.path.startswith("/admin/")\
             or request.path.startswith("/favicon.ico") or (request.is_ajax() and not request.path.startswith('/o/')):
            return
        # create some useful variables
        ip_address = utils.get_ip(request)
        user_agent = unicode(request.META.get('HTTP_USER_AGENT', '')[:255],
                             errors='ignore')

        # retrieve untracked user agents from cache
        ua_key = '_tracking_untracked_uas'
        untracked = cache.get(ua_key)
        if untracked is None:
            log.info('Updating untracked user agent cache')
            untracked = UntrackedUserAgent.objects.all()
            cache.set(ua_key, untracked, 3600)

        # see if the user agent is not supposed to be tracked
        for ua in untracked:
            # if the keyword is found in the user agent, stop tracking
            if user_agent.find(ua.keyword) != -1:
                log.debug('Not tracking UA "%s" because of keyword: %s' %
                          (user_agent, ua.keyword))
                return

        if hasattr(request, 'session') and request.session.session_key:
            # use the current session key if we can
            session_key = request.session.session_key
        else:
            # otherwise just fake a session key
            session_key = '%s:%s' % (ip_address, user_agent)
            session_key = session_key[:40]

        # ensure that the request.path does not begin with any of the prefixes
        for prefix in self.prefixes:
            if request.path.startswith(prefix):
                log.debug('Not tracking request to: %s' % request.path)
                return

        # if we get here, the URL needs to be tracked
        # determine what time it is
        now = datetime.now()

        attrs = {'session_key': session_key, 'ip_address': ip_address}

        visitor_id = request.session.get('visitor_id', None)
        if not visitor_id:
            # for some reason, Visitor.objects.get_or_create was not working here
            try:
                visitor = Visitor.objects.only('id').get(**attrs)
            except Visitor.DoesNotExist:
                request.session.set_test_cookie()
                # see if there's a visitor with the same IP and user agent
                # within the last 5 minutes
                cutoff = now - timedelta(minutes=5)
                visitors = Visitor.objects.only('id').filter(
                    ip_address=ip_address,
                    user_agent=user_agent,
                    last_update__gte=cutoff)

                if len(visitors):
                    visitor = visitors[0]
                    visitor.session_key = session_key
                    log.debug('Using existing visitor for IP %s / UA %s: %s' %
                              (ip_address, user_agent, visitor.id))
                else:
                    # it's probably safe to assume that the visitor is brand new
                    visitor = Visitor(**attrs)
                    log.debug('Created a new visitor: %s' % attrs)
                try:
                    visitor.save()
                except DatabaseError:
                    print_stack_trace()
                    log.error(
                        'There was a problem saving visitor information:\n%s\n\n%s'
                        % (traceback.format_exc(), locals()))
            except:
                return

            request.session['visitor_id'] = visitor_id = visitor.id

        redis_data = redis.get('visitor_data_%s' % visitor_id) or '{}'
        visitor_data = json.loads(redis_data)
        visitor_data['visitor_id'] = visitor_id

        # update the tracking information
        visitor_data['user_agent'] = user_agent

        # if the visitor record is new, or the visitor hasn't been here for
        # at least an hour, update their referrer URL
        one_hour_ago = pytz.UTC.localize(now - timedelta(hours=1))
        # TODO: ensure that we are on the same time zone - I just put UTC for now
        # to get it working
        last_update = visitor_data.get('last_update', None)
        if not last_update or last_update <= time.mktime(
                one_hour_ago.timetuple()):
            visitor_data['referrer'] = utils.u_clean(
                request.META.get('HTTP_REFERER', 'unknown')[:255])

            # reset the number of pages they've been to
            visitor_data['page_views'] = 0
            visitor_data['session_start'] = time.mktime(now.timetuple())

        visitor_data['url'] = request.path
        page_views = visitor_data.get('page_views', 0) + 1
        visitor_data['page_views'] = page_views
        visitor_data['last_update'] = time.mktime(now.timetuple())

        try:
            # Extracting visitor data from GA cookie
            cookie = request.COOKIES.get('__utmz')
            if cookie:
                try:
                    data = cookie.split('.', 4)[-1]
                    data = dict(match.groups() for match in re.finditer(
                        r'(utm(?:csr|ccn|cmd|ctr))=([^\|]*)', data))
                except (ValueError, IndexError):
                    log.error('Malformed GA cookie: {0!r}'.format(cookie))
                else:
                    visitor_data['source'] = normalize_ga_value(
                        data.get('utmcsr'))
                    visitor_data['medium'] = normalize_ga_value(
                        data.get('utmcmd'))
                    visitor_data['campaign'] = normalize_ga_value(
                        data.get('utmccn'))
                    visitor_data['keywords'] = normalize_ga_value(
                        data.get('utm.ctr'))

            utm_source = request.GET.get("utm_source", "unknown")
            request.session['acquisition_source_name'] = utm_source

            if utm_source != "unknown":
                # utm_source: Identify the advertiser, site, publication, etc. that is sending traffic to your property, e.g. google, citysearch, newsletter4, billboard.
                # utm_medium: The advertising or marketing medium, e.g.: cpc, banner, email newsletter.
                # utm_campaign: The individual campaign name, slogan, promo code, etc. for a product.
                # utm_term: Identify paid search keywords. If you're manually tagging paid keyword campaigns, you should also use utm_term to specify the keyword.
                # utm_content: Used to differentiate similar content, or links within the same ad. For example, if you have two call-to-action links within the same email message, you can use utm_content and set different values for each so you can tell which version is more effective.

                #update the tracking info with the latest and bump the old one to be stored in the history

                #visitor.bump_past_acquisition_info()
                past_acquisition_info = visitor_data.get(
                    'past_acquisition_info', [])
                if visitor_data.get('acquisition_source', None):
                    old_visitor_data = {'date_valid_until': time.time()}
                    for k in VISITOR_PARAMS_MAPPING.keys():
                        old_visitor_data[k] = visitor_data.get(k, None)
                    past_acquisition_info.append(old_visitor_data)
                    visitor_data[
                        'past_acquisition_info'] = past_acquisition_info
                for k, v in VISITOR_PARAMS_MAPPING.items():
                    value = request.GET.get(v, 'unknown')[:255]
                    visitor_data[k] = value

        except:
            print_stack_trace()
        redis.set('visitor_data_%s' % visitor_id, json.dumps(visitor_data))
コード例 #48
0
ファイル: image.py プロジェクト: beforebeta/pennywyse
def _get_image(user, image_url, specific_height=-1, specific_width=-1):
    """
        Returns the image at the image_url of the specific height and width
        if image is not locally buffers, then it downloads the original image at the url
        if specific height and width are set, it also creates a resized version of the image
    """
    def _download_image_to_local(src_image_url, src_image_pointer, height,
                                 width):
        prefix, ext = os.path.splitext(urlparse.urlparse(src_image_url).path)
        prefix = prefix.replace('/', '').replace('\\', '')
        prefix = url2path(prefix)

        filename = '%s_%s%s' % (uuid.uuid4().hex, uuid.uuid4().hex, ext)
        local_copy = os.path.join(settings.IMAGE_LOCAL_COPY_DIR, filename)
        local_url = os.path.join(settings.IMAGE_LOCAL_COPY_DIR_NO_PREFIX,
                                 filename)
        file_saved_path = local_copy

        if height == -1:
            #ensure exists
            assert src_image_pointer.status_code == 200

            #ensure is image!
            content_type = src_image_pointer.headers['content-type'].lower()

            if content_type[:5] != 'image':
                #cloudfront servers typically hosting images as octet-streams
                #we need to handle that
                assert 'application/octet-stream' in content_type
                #also in these cases we don't allow files greater than 700KB -> 716800 = 700*1024
                assert int('30611') < 716800

            if src_image_pointer.status_code == 200:
                #download remote file
                file_saved_path = local_copy
                local_copy = open(local_copy, 'wb')
                local_copy.write(src_image_pointer.content)
                local_copy.close()
            else:
                raise Http404()
        else:
            img_util.resize(src_image_pointer,
                            (specific_width, specific_height), True,
                            local_copy)


#        s3_url = s3.upload(file_saved_path)

#store reference in imagestore
#        img = ImageStore(remote_url=image_url, local_url=s3_url, source_user=user, height=height, width=width)
        img = ImageStore(remote_url=image_url,
                         local_url="/%s" % local_url,
                         source_user=user,
                         height=height,
                         width=width)
        img.save()
        return img

    def _download_temp_image(src_image_url, src_image_pointer, height, width):
        prefix, ext = os.path.splitext(urlparse.urlparse(src_image_url).path)
        prefix = prefix.replace('/', '').replace('\\', '')
        prefix = url2path(prefix)

        filename = '%s_%s%s' % (uuid.uuid4().hex, uuid.uuid4().hex, ext)
        local_copy = os.path.join(settings.IMAGE_LOCAL_COPY_DIR, filename)
        local_url = os.path.join(settings.IMAGE_LOCAL_COPY_DIR_NO_PREFIX,
                                 filename)

        if height == -1:
            #ensure exists
            assert src_image_pointer.status_code == 200

            #ensure is image!
            content_type = src_image_pointer.headers['content-type'].lower()

            if content_type[:5] != 'image':
                #cloudfront servers typically hosting images as octet-streams
                #we need to handle that
                assert 'application/octet-stream' in content_type
                #also in these cases we don't allow files greater than 700KB -> 716800 = 700*1024
                assert int('30611') < 716800

            if src_image_pointer.status_code == 200:
                #download remote file
                local_copy = open(local_copy, 'wb')
                local_copy.write(src_image_pointer.content)
                local_copy.close()
            else:
                raise Http404()

        return local_url

    if user.is_anonymous() or not (user.is_authenticated()):
        try:
            user = User.objects.get(username=IMAGE_ANONYMOUS_USER)
        except:
            print_stack_trace()
            user = User.objects.create_user(
                username=IMAGE_ANONYMOUS_USER,
                email='*****@*****.**',
                password=IMAGE_ANONYMOUS_USER)

    image_url = urllib.unquote_plus(image_url)

    original_image = None

    #check if image already exists
    image_url_to_check = image_url
    if ShortenedURL.objects.should_shorten_url(image_url):
        image_url_to_check = ShortenedURL.objects.shorten_url(
            image_url).shortened_url

    for image in ImageStore.objects.filter(remote_url=image_url_to_check):
        if image.height == specific_height and image.width == specific_width:
            #found the   image with the exact dimensions
            return image
        if image.height == image.width == -1:
            #found the original image
            original_image = image

    if not original_image:
        #original image not available
        #download original image
        original_image = _download_image_to_local(image_url,
                                                  requests.get(image_url), -1,
                                                  -1)
    else:
        original_image.local_url = _download_temp_image(
            image_url, requests.get(image_url), -1, -1)

    if specific_height == specific_width == -1:
        #        os.remove(os.path.join(settings.IMAGE_LOCAL_COPY_DIR, original_image.local_url[1:].split('/')[-1]))
        return original_image
    else:
        #required image is not available
        #resize original image to required image
        resized_image = _download_image_to_local(
            image_url,
            os.path.join(settings.IMAGE_LOCAL_COPY_DIR,
                         original_image.local_url[1:].split('/')[-1]),
            specific_height, specific_width)
        #        os.remove(os.path.join(settings.IMAGE_LOCAL_COPY_DIR, resized_image.local_url[1:].split('/')[-1]))
        #        os.remove(os.path.join(settings.IMAGE_LOCAL_COPY_DIR, original_image.local_url[1:].split('/')[-1]))
        return resized_image
コード例 #49
0
def crosscheck_by_field(deals_to_dedup, field_name):
    from core.signals import update_object
    duplicate_deals_list = []  # List of duplicate coupon pks.

    if field_name == 'coupon_directlink':
        field_list = list(set([d.directlink for d in deals_to_dedup]))
    elif field_name == 'merchant_name':
        field_list = list(set([d.merchant.name for d in deals_to_dedup]))
    else:
        return

    all_active_deals = len(deals_to_dedup)
    num_of_unique_fields = len(field_list)
    try:
        print "\n...Detected {} deals by '{}' field to dedup out of {} total active deals".format(
            num_of_unique_fields, field_name, all_active_deals), show_time()
    except:
        pass

    progress_count = 1
    clear_cache_timer = 1
    for x in field_list:
        try:
            same_looking_deals = Coupon.all_objects.filter(ref_id_source='sqoot', is_duplicate=False,
                                                           is_deleted=False, online=False)\
                                                   .exclude(end__lt=datetime.now(pytz.utc))
            if field_name == 'coupon_directlink':
                same_looking_deals = same_looking_deals.filter(directlink=x)
            elif field_name == 'merchant_name':
                same_looking_deals = same_looking_deals.filter(
                    merchant__name__contains=x)

            if same_looking_deals.count() <= 1:
                print show_time(), '({}/{}) DEDUP-HARD:'.format(
                    progress_count,
                    num_of_unique_fields), '...no duplicate, skipping...'
                progress_count += 1
                clear_cache_timer += 1
                continue

            try:
                print show_time(), '({}/{}) DEDUP-HARD:'.format(
                    progress_count,
                    num_of_unique_fields), 'all deals with {}=={}'.format(
                        field_name, x)
            except:
                pass

            while True:
                current_count = same_looking_deals.count()
                if current_count == 1:
                    break
                else:
                    for c in same_looking_deals[1:current_count]:
                        if c.is_duplicate or (c.pk in duplicate_deals_list):
                            continue

                        does_it_look_duplicate, which_deal = compare_location_between(
                            same_looking_deals[0], c)
                        if not does_it_look_duplicate:
                            continue

                        if which_deal == same_looking_deals[0]:
                            duplicate_deals_list.append(which_deal.pk)
                            break
                        else:
                            duplicate_deals_list.append(which_deal.pk)
                    same_looking_deals = same_looking_deals.exclude(
                        pk=same_looking_deals[0].pk)
            progress_count += 1
            clear_cache_timer += 1
            if clear_cache_timer >= 100:
                duplicate_deals_list = list(set(duplicate_deals_list))
                Coupon.all_objects.filter(pk__in=duplicate_deals_list).update(
                    is_duplicate=True)
                for coupon in Coupon.all_objects.filter(
                        pk__in=duplicate_deals_list):
                    handle_exceptions(
                        update_object.send(sender=Coupon, instance=coupon))
                    print 'Updated %s' % coupon.id
                duplicate_deals_list = []
                clear_cache_timer = 1
        except:
            try:
                print "!!!ERROR: field: {}".format(x)
            except:
                pass
            print_stack_trace()
コード例 #50
0
ファイル: views.py プロジェクト: beforebeta/pennywyse
def click_track(request, clicked_link_path=None):
    try:
        referer = utils.u_clean(request.META.get('HTTP_REFERER', 'unknown')[:255])
        clicked_link = request.POST["clicked"][:255]
        try:
            clicked_link=clicked_link.lower()
        except:
            print_stack_trace()

        source_url_type='landing'
        if re.search('/coupons/[a-z0-9-]+/[a-z0-9-]+/[\d]+/', referer):
            source_url_type = 'coupon'
        elif re.search('/coupons/[a-z0-9-]+/', referer):
            source_url_type = 'company'
        elif re.search('/categories/[A-z0-9-]+/', referer):
            source_url_type = 'category'

        coupon=None
        merchant=None

        if "/coupon/" in clicked_link:
            #skimlinks will assume the source url to be the /coupon/ url
            if clicked_link.endswith("/"):
                coupon_id = clicked_link.split("/")[-2] #assumes trailing '/'
            else:
                coupon_id = clicked_link.split("/")[-1]
            source_url = clicked_link
            coupon = Coupon.active_objects.get(id=int(coupon_id))
            try:
                merchant = Merchant.objects.get(id=coupon.merchant.id)
            except:
                merchant = None
            target_url = coupon.get_retailer_link()
        else:
            source_url = referer
            target_url = clicked_link
            merchant = None

        if 'go.redirectingat.com' in target_url:
            target_url = _remove_skimlinks(target_url)

        merchant_domain = shorten_to_domain(target_url)
        visitor = Visitor.objects.get(pk=request.session['visitor_id'])
        click_track                 = ClickTrack()
        click_track.visitor         = visitor
        click_track.user_agent      = visitor.user_agent[:255]
        click_track.referer         = referer[:255]
        click_track.target_url      = target_url[:255]
        click_track.source_url_type = source_url_type[:255]
        click_track.source_url      = source_url[:255]
        click_track.merchant        = merchant
        click_track.coupon          = coupon
        click_track.merchant_domain = merchant_domain[:255]

        try:
            click_track.acquisition_source      = visitor.acquisition_source
            click_track.acquisition_medium      = visitor.acquisition_medium
            click_track.acquisition_term        = visitor.acquisition_term
            click_track.acquisition_content     = visitor.acquisition_content
            click_track.acquisition_campaign    = visitor.acquisition_campaign
            click_track.acquisition_gclid       = visitor.acquisition_gclid
        except:
            print_stack_trace()

        try:
            click_track.save()
        except:
            try:
                print "Visitor ID", click_track.visitor
                print "User Agent", click_track.user_agent
                print "Referer", click_track.referer
                print "target_url", click_track.target_url
                print "source_url_type", click_track.source_url_type
                print "merchant", click_track.merchant
                print "coupon", click_track.coupon
                print "merchant_domain", click_track.merchant_domain
                print merchant.name, merchant.id
            except:
                pass
            print_stack_trace()
    except:
        print_stack_trace()
    return success()
コード例 #51
0
ファイル: sqootutils.py プロジェクト: beforebeta/pennywyse
def crosscheck_by_field(deals_to_dedup, field_name):
    from core.signals import update_object
    duplicate_deals_list = [] # List of duplicate coupon pks.

    if field_name == 'coupon_directlink':
        field_list = list(set([d.directlink for d in deals_to_dedup]))
    elif field_name == 'merchant_name':
        field_list = list(set([d.merchant.name for d in deals_to_dedup]))
    else:
        return

    all_active_deals = len(deals_to_dedup)
    num_of_unique_fields = len(field_list)
    try:
        print "\n...Detected {} deals by '{}' field to dedup out of {} total active deals".format(num_of_unique_fields, field_name, all_active_deals), show_time()
    except:
        pass

    progress_count = 1
    clear_cache_timer = 1
    for x in field_list:
        try:
            same_looking_deals = Coupon.all_objects.filter(ref_id_source='sqoot', is_duplicate=False,
                                                           is_deleted=False, online=False)\
                                                   .exclude(end__lt=datetime.now(pytz.utc))
            if field_name == 'coupon_directlink':
                same_looking_deals = same_looking_deals.filter(directlink=x)
            elif field_name == 'merchant_name':
                same_looking_deals = same_looking_deals.filter(merchant__name__contains=x)

            if same_looking_deals.count() <= 1:
                print show_time(), '({}/{}) DEDUP-HARD:'.format(progress_count, num_of_unique_fields), '...no duplicate, skipping...'
                progress_count += 1
                clear_cache_timer += 1
                continue

            try:
                print show_time(), '({}/{}) DEDUP-HARD:'.format(progress_count, num_of_unique_fields), 'all deals with {}=={}'.format(field_name, x)
            except:
                pass

            while True:
                current_count = same_looking_deals.count()
                if current_count == 1:
                    break
                else:
                    for c in same_looking_deals[1:current_count]:
                        if c.is_duplicate or (c.pk in duplicate_deals_list):
                            continue

                        does_it_look_duplicate, which_deal = compare_location_between(same_looking_deals[0], c)
                        if not does_it_look_duplicate:
                            continue

                        if which_deal == same_looking_deals[0]:
                            duplicate_deals_list.append(which_deal.pk)
                            break
                        else:
                            duplicate_deals_list.append(which_deal.pk)
                    same_looking_deals = same_looking_deals.exclude(pk=same_looking_deals[0].pk)
            progress_count += 1
            clear_cache_timer += 1
            if clear_cache_timer >= 100:
                duplicate_deals_list = list(set(duplicate_deals_list))
                Coupon.all_objects.filter(pk__in=duplicate_deals_list).update(is_duplicate=True)
		for coupon in Coupon.all_objects.filter(pk__in=duplicate_deals_list):
                    handle_exceptions(update_object.send(sender=Coupon, instance=coupon))
		    print 'Updated %s' % coupon.id
                duplicate_deals_list = []
                clear_cache_timer = 1
        except:
            try:
                print "!!!ERROR: field: {}".format(x)
            except:
                pass
            print_stack_trace()