def mlRefreshViewFullPlacementsDataCron():
    try:
        change_state = LastModified.objects.filter(
            type='mlRefreshViewFullPlacementsDataCron')
        if len(change_state) >= 1:
            if timezone.make_aware(
                    datetime.datetime.now(), timezone.get_default_timezone(
                    )) - change_state[0].date >= timedelta(minutes=15):
                LastModified.objects.filter(
                    type='mlRefreshViewFullPlacementsDataCron').delete()
            else:
                print "mlRefreshViewFullPlacementsDataCron is busy, wait..."
                return None
        LastModified(type='mlRefreshViewFullPlacementsDataCron',
                     date=timezone.make_aware(
                         datetime.datetime.now(),
                         timezone.get_default_timezone())).save()

        with connection.cursor() as cursor:
            cursor.execute(
                "REFRESH MATERIALIZED VIEW ml_view_full_placements_data")
        LastModified.objects.filter(
            type='mlRefreshViewFullPlacementsDataCron').delete()
    except Exception, e:
        LastModified.objects.filter(
            type='mlRefreshViewFullPlacementsDataCron').delete()
        print "Can't update view" + str(e)
        return 1
Esempio n. 2
0
def suspend_state_middleware_cron():
    try:
        change_state = LastModified.objects.filter(
            type='suspend_state_middleware_cron')
        if len(change_state) >= 1:
            if timezone.make_aware(
                    datetime.datetime.now(), timezone.get_default_timezone(
                    )) - change_state[0].date >= timedelta(minutes=15):
                LastModified.objects.filter(
                    type='suspend_state_middleware_cron').delete()
            else:
                print "suspend_state_middleware_cron is busy, wait..."
                return None
        LastModified(type='suspend_state_middleware_cron',
                     date=timezone.make_aware(
                         datetime.datetime.now(),
                         timezone.get_default_timezone())).save()

        state = PlacementState(None, None)
        state.suspend_state_middleware()

        LastModified.objects.filter(
            type='suspend_state_middleware_cron').delete()
    except Exception, e:
        LastModified.objects.filter(
            type='suspend_state_middleware_cron').delete()
        print 'Cron job - suspend_state_middleware_cron Error: ' + str(e)
Esempio n. 3
0
def change_state_placement_by_cron_settings():
    try:
        change_state = LastModified.objects.filter(
            type='change_state_placement_by_cron_settings')
        if len(change_state) >= 1:
            if timezone.make_aware(
                    datetime.datetime.now(), timezone.get_default_timezone(
                    )) - change_state[0].date >= timedelta(minutes=15):
                LastModified.objects.filter(
                    type='change_state_placement_by_cron_settings').delete()
            else:
                print "change_state_placement_by_cron_settings is busy, wait..."
                return None
        LastModified(type='change_state_placement_by_cron_settings',
                     date=timezone.make_aware(
                         datetime.datetime.now(),
                         timezone.get_default_timezone())).save()

        state = PlacementState(None, None)
        state.change_state_placement_by_cron()

        LastModified.objects.filter(
            type='change_state_placement_by_cron_settings').delete()
    except Exception, e:
        LastModified.objects.filter(
            type='change_state_placement_by_cron_settings').delete()
        print 'Cron job - change_state_placement_by_cron_settings Error: ' + str(
            e)
def mlPredictNewPlacementsCron():
    try:
        change_state = LastModified.objects.filter(
            type='mlPredictNewPlacementsCron')
        if len(change_state) >= 1:
            if timezone.make_aware(
                    datetime.datetime.now(), timezone.get_default_timezone(
                    )) - change_state[0].date >= timedelta(minutes=15):
                LastModified.objects.filter(
                    type='mlPredictNewPlacementsCron').delete()
            else:
                print "mlPredictNewPlacementsCron is busy, wait..."
                return None
        LastModified(type='mlPredictNewPlacementsCron',
                     date=timezone.make_aware(
                         datetime.datetime.now(),
                         timezone.get_default_timezone())).save()

        impsBorder = 1000
        goodClusters = mlGetGoodClusters("ctr_cvr_cpc_cpm_cpa", "ecommerceAd")
        if goodClusters == -1:
            print "K-means eCommerce model is not taught"
        else:
            # find new placements for test_number 2
            newPlacementsList = NetworkAnalyticsReport_ByPlacement.objects.raw(
                """SELECT
                      t1.placement_id as id
                    FROM
                      network_analytics_report_by_placement t1
                      left join ml_placements_clusters_kmeans t2 on t2.placement_id = t1.placement_id and test_number = 2 and t2.adv_type='ecommerceAd'
                      join campaign
                      on t1.campaign_id = campaign.id
                      join advertiser
                        on advertiser.id=campaign.advertiser_id
                    where t2.placement_id is null and advertiser.ad_type='ecommerceAd'
                    GROUP BY
                      t1.placement_id
                    HAVING
                      COUNT(DISTINCT extract ( dow from t1.hour)) = 7 and SUM(t1.imps) >="""
                + str(impsBorder))
            # predict new placements for test_number 2
            n = 0
            LastModified.objects.filter(type='mlPredictNewPlacementsCron') \
                .update(date=timezone.make_aware(datetime.datetime.now(), timezone.get_default_timezone()))

            for row in newPlacementsList:
                LastModified.objects.filter(type='mlPredictNewPlacementsCron') \
                    .update(date=timezone.make_aware(datetime.datetime.now(), timezone.get_default_timezone()))
                mlPredictKmeans(row.id,
                                "ctr_cvr_cpc_cpm_cpa",
                                advertiser_type="ecommerceAd")
                n += 1

            print "K-means eCommerce placements recognized: " + str(n)

        goodClusters = mlGetGoodClusters("ctr_cvr_cpc_cpm_cpa",
                                         "leadGenerationAd")
        if goodClusters == -1:
            print "K-means lead-generation model is not taught"
        else:
            # find new placements for test_number 2
            newPlacementsList = NetworkAnalyticsReport_ByPlacement.objects.raw(
                """SELECT
                      t1.placement_id as id
                    FROM
                      network_analytics_report_by_placement t1
                      left join ml_placements_clusters_kmeans t2 on t2.placement_id = t1.placement_id and test_number = 2 and t2.adv_type='leadGenerationAd'
                      join campaign
                      on t1.campaign_id = campaign.id
                      join advertiser
                        on advertiser.id=campaign.advertiser_id
                    where t2.placement_id is null and advertiser.ad_type='leadGenerationAd'
                    GROUP BY
                      t1.placement_id
                    HAVING
                      COUNT(DISTINCT extract ( dow from t1.hour)) = 7 and SUM(t1.imps) >="""
                + str(impsBorder))
            # predict new placements for test_number 2
            n = 0
            LastModified.objects.filter(type='mlPredictNewPlacementsCron') \
                .update(date=timezone.make_aware(datetime.datetime.now(), timezone.get_default_timezone()))

            for row in newPlacementsList:
                LastModified.objects.filter(type='mlPredictNewPlacementsCron') \
                    .update(date=timezone.make_aware(datetime.datetime.now(), timezone.get_default_timezone()))
                mlPredictKmeans(row.id,
                                "ctr_cvr_cpc_cpm_cpa",
                                advertiser_type="leadGenerationAd")
                n += 1

            print "K-means lead-generation placements recognized: " + str(n)

        #logreg
        n = 0
        tempQuery = MLLogisticRegressionCoeff.objects.filter(day=7,
                                                             test_number=3)
        if not tempQuery:
            print "Logistic regression model is not taught"
        else:
            newPlacementsList = NetworkAnalyticsReport_ByPlacement.objects.raw(
                """
                SELECT
                  t1.placement_id as id
                FROM
                  network_analytics_report_by_placement t1
                  left join ml_logistic_regression_results t2 on t2.placement_id = t1.placement_id and test_number = 3 and probability != -1
                WHERE
                  t2.placement_id is null
                GROUP BY
                  t1.placement_id
                HAVING
                  COUNT(DISTINCT extract (dow from t1.hour)) = 7 and SUM(t1.imps) >="""
                + str(impsBorder))
            LastModified.objects.filter(type='mlPredictNewPlacementsCron') \
                .update(date=timezone.make_aware(datetime.datetime.now(), timezone.get_default_timezone()))
            for row in newPlacementsList:
                LastModified.objects.filter(type='mlPredictNewPlacementsCron') \
                    .update(date=timezone.make_aware(datetime.datetime.now(), timezone.get_default_timezone()))
                mlPredictLogisticRegression(row.id, "ctr_cvr_cpc_cpm_cpa")
                n += 1
            print "Logistic regression placements recognized: " + str(n)
        print "New placements prediction completed"
        LastModified.objects.filter(type='mlPredictNewPlacementsCron').delete()
    except Exception, e:
        LastModified.objects.filter(type='mlPredictNewPlacementsCron').delete()
        print 'Cron job - mlPredictNewPlacementsCron Error: ' + str(e)
Esempio n. 5
0
def mlChangeCampaignCpmCron():
    try:
        change_state = LastModified.objects.filter(
            type='mlChangeCampaignCpmCron')
        if len(change_state) >= 1:
            if timezone.make_aware(
                    datetime.now(), timezone.get_default_timezone(
                    )) - change_state[0].date >= timedelta(minutes=15):
                LastModified.objects.filter(
                    type='mlChangeCampaignCpmCron').delete()
            else:
                print "mlChangeCampaignCpmCron is busy, wait..."
                return None
        LastModified(
            type='mlChangeCampaignCpmCron',
            date=timezone.make_aware(datetime.now(),
                                     timezone.get_default_timezone())).save()

        algoVocabl = []
        algoVocabl.append("gradient")
        algoVocabl.append("random_forest")
        algoVocabl.append("abtree")
        queryRes = MLVideoAdCampaignsModels.objects.all()
        for row in queryRes:
            if isfile(row.path):
                predictor = joblib.load(row.path)
            else:
                print "File " + str(row.path) + " is not exist"
                continue
            # loading data
            data = getMLCpmData(campaignId=row.campaign_id, windowSize=12)
            data, features, _ = getDataWindow(
                type=algoVocabl.index(row.type),
                data=data,
                windowSize=12,
            )
            if data == -1:
                print "Can not predict CPM with that amount of the data for the campaign " + str(
                    row.campaign_id)
                continue
            # getting answer from the predictor
            ans = predictor.predict(features)[0]
            if ans < 0:
                ans = 0
            cpm = (ans / 100.0) * float(data[0][7])
            # send to appnexus
            campInfo = getCampaignById(row.campaign_id)
            campInfo["base_bid"] = float(cpm)
            campInfo["cpm_bid_type"] = "base"
            url = 'https://api.appnexus.com/campaign?id=' + str(
                row.campaign_id) + "&advertiser_id=" + str(
                    campInfo["advertiser_id"])
            headers = {
                "Authorization": getToken(),
                'Content-Type': 'application/json'
            }
            data = json.dumps({"campaign": campInfo})
            apnxResponse = json.loads(
                requests.put(url=url, headers=headers, data=data).content)
            try:
                apnxResponse['response']['error']
                print "Appnexus error on the changing of CPM for campaign " + str(
                    row.campaign_id)
                continue
            except:
                pass
            # save in db
            MLVideoAdCampaignsResults(advertiser_id=row.advertiser_id,
                                      campaign_id=row.campaign_id,
                                      res_date=timezone.make_aware(
                                          datetime.now(),
                                          timezone.get_default_timezone()),
                                      type=row.type,
                                      fill_rate=ans,
                                      cpm=cpm,
                                      profit_loss=None).save()
            LastModified.objects.filter(type='mlChangeCampaignCpmCron') \
                .update(date=timezone.make_aware(datetime.now(), timezone.get_default_timezone()))

        LastModified.objects.filter(type='mlChangeCampaignCpmCron').delete()
    except Exception, e:
        LastModified.objects.filter(type='mlChangeCampaignCpmCron').delete()
        print 'Cron job - mlChangeCampaignCpmCron Error: ' + str(e)
Esempio n. 6
0
def mlRefreshAlgoListCron():
    try:
        change_state = LastModified.objects.filter(
            type='mlRefreshAlgoListCron')
        if len(change_state) >= 1:
            if timezone.make_aware(
                    datetime.now(), timezone.get_default_timezone(
                    )) - change_state[0].date >= timedelta(minutes=15):
                LastModified.objects.filter(
                    type='mlRefreshAlgoListCron').delete()
            else:
                print "mlRefreshAlgoListCron is busy, wait..."
                return None
        LastModified(
            type='mlRefreshAlgoListCron',
            date=timezone.make_aware(datetime.now(),
                                     timezone.get_default_timezone())).save()

        print "Start of the campaigns CPM prediction learning"
        algoVocabl = []
        algoVocabl.append("gradient")
        algoVocabl.append("random_forest")
        algoVocabl.append("abtree")
        algoList = []
        algoList.append(GradientBoostingRegressor())
        algoList.append(RandomForestRegressor(n_estimators=200, max_depth=10))
        algoList.append(
            AdaBoostRegressor(DecisionTreeRegressor(max_depth=10),
                              n_estimators=500,
                              random_state=np.random.RandomState(1)))
        allAdvertisers = Advertiser.objects.filter(ad_type="videoAds")
        for advertiser in allAdvertisers:
            allCampaigns = Campaign.objects.filter(advertiser_id=advertiser.id)
            for campaign in allCampaigns:
                # filling campaign data
                data = getMLCpmData(campaignId=campaign.id,
                                    fromDate=datetime.now() -
                                    timedelta(days=30),
                                    toDate=datetime.now())

                LastModified.objects.filter(type='mlRefreshAlgoListCron')\
                    .update(date=timezone.make_aware(datetime.now(), timezone.get_default_timezone()))

                # check of the data for learning amount
                if len(data) < 264:
                    print "Not enough data for the campaign " + str(
                        campaign.id)
                    continue
                # models learning
                for i in xrange(len(algoList)):
                    if isfile("rtb/res/prediction_models_cpm/" +
                              str(campaign.id) + "_" + str(algoVocabl[i]) +
                              ".pkl"):
                        continue
                    tempData, allFeaturesForLearning, allResultsLearning = getDataWindow(
                        i, data, 12)
                    algoList[i].fit(allFeaturesForLearning, allResultsLearning)
                    # TODO: make a check for getting a better algo
                    joblib.dump(algoList[i],
                                "rtb/res/prediction_models_cpm/" +
                                str(campaign.id) + "_" + str(algoVocabl[i]) +
                                ".pkl",
                                compress=1)
                    MLVideoAdCampaignsModelsInfo.objects.update_or_create(
                        campaign_id=campaign.id,
                        type=algoVocabl[i],
                        defaults={
                            "path":
                            "rtb/res/prediction_models_cpm/" +
                            str(campaign.id) + "_" + str(algoVocabl[i]) +
                            ".pkl",
                            "start":
                            timezone.make_aware(
                                data[0][6], timezone.get_default_timezone()),
                            "finish":
                            timezone.make_aware(
                                data[len(data) - 1][6],
                                timezone.get_default_timezone()),
                            "evaluation_date":
                            timezone.make_aware(
                                datetime.now(),
                                timezone.get_default_timezone())
                        })

        LastModified.objects.filter(type='mlRefreshAlgoListCron').delete()
        print "Campaigns cpm prediction learning finished"
    except Exception, e:
        LastModified.objects.filter(type='mlRefreshAlgoListCron').delete()
        print 'Cron job - mlRefreshAlgoListCron Error: ' + str(e)
Esempio n. 7
0
def get():
    try:
        # Verbtimedelta - it is time range
        Verbtimedelta = 6

        change_state = LastModified.objects.filter(type='get_data_from_impression_tracker')
        if len(change_state) >= 1:
            if timezone.make_aware(datetime.now(), timezone.get_default_timezone()) - change_state[0].date >= timedelta(minutes=15):
                LastModified.objects.filter(type='get_data_from_impression_tracker').delete()
            else:
                print "get_data_from_impression_tracker is busy, wait..."
                return None

        LastModified(type='get_data_from_impression_tracker', date=timezone.make_aware(datetime.now(), timezone.get_default_timezone())).save()

        # Domain Tracker
        print 'Domain start'
        start_ = RtbDomainTracker.objects.aggregate(Max("Date"))["Date__max"]
        if start_ is None:
            start = '2017-01-18 12:00:00'
        else:
            start = (start_ + timedelta(seconds=1)).strftime('%Y-%m-%d %H:%M:%S')
        try:
            start_date = datetime.strptime(start, '%Y-%m-%d %H:%M:%S')
            end_date = datetime.now()
            startStr = ''
            for result in perdelta(start_date, end_date, timedelta(minutes=Verbtimedelta)):
                endStr = result.strftime('%Y-%m-%d %H:%M:%S')+".999999999"
                if startStr == '':
                    startStr = result.strftime('%Y-%m-%d %H:%M:%S')
                    continue
                # Update time every circle
                LastModified.objects.filter(type='get_data_from_impression_tracker').update(date=timezone.make_aware(datetime.now(), timezone.get_default_timezone()))
                impTracker(startStr, endStr, 'Domain')
                startStr = (result + timedelta(seconds=1)).strftime('%Y-%m-%d %H:%M:%S')
        except Exception as e:
            print 'Domain Error: ' + str(e)
        print 'Domain completed'

        # Impression Tracker
        print 'Impression start'
        start_ = RtbImpressionTracker.objects.aggregate(Max("Date"))["Date__max"]
        if start_ is None:
            start = '2016-09-01 00:00:00'
        else:
            start = (start_ + timedelta(seconds=1)).strftime('%Y-%m-%d %H:%M:%S')
        try:
            start_date = datetime.strptime(start, '%Y-%m-%d %H:%M:%S')
            end_date = datetime.now()
            startStr = ''
            for result in perdelta(start_date, end_date, timedelta(minutes=Verbtimedelta)):
                endStr = result.strftime('%Y-%m-%d %H:%M:%S')+".999999999"
                if startStr == '':
                    startStr = result.strftime('%Y-%m-%d %H:%M:%S')
                    continue
                # Update time every circle
                LastModified.objects.filter(type='get_data_from_impression_tracker').update(date=timezone.make_aware(datetime.now(), timezone.get_default_timezone()))
                impTracker(startStr, endStr, 'Impression')
                startStr = (result + timedelta(seconds=1)).strftime('%Y-%m-%d %H:%M:%S')
        except Exception as e:
            print 'Impression Error: ' + str(e)
        print 'Impression completed'


        # Click Tracker
        print 'Click start'
        start_ = RtbClickTracker.objects.aggregate(Max("Date"))["Date__max"]
        if start_ is None:
            start = '2016-09-01 00:00:00'
        else:
            start = (start_ + timedelta(seconds=1)).strftime('%Y-%m-%d %H:%M:%S')
        try:
            start_date = datetime.strptime(start, '%Y-%m-%d %H:%M:%S')
            end_date = datetime.now()
            startStr = ''
            for result in perdelta(start_date, end_date, timedelta(minutes=Verbtimedelta)):
                endStr = result.strftime('%Y-%m-%d %H:%M:%S') + ".999999999"
                if startStr == '':
                    startStr = result.strftime('%Y-%m-%d %H:%M:%S')
                    continue
                # Update time every circle
                LastModified.objects.filter(type='get_data_from_impression_tracker').update(date=timezone.make_aware(datetime.now(), timezone.get_default_timezone()))
                impTracker(startStr, endStr, 'Click')
                startStr = (result + timedelta(seconds=1)).strftime('%Y-%m-%d %H:%M:%S')
        except Exception as e:
            print 'Click Error: ' + str(e)
        print 'Click completed'

        # Conversion Tracker
        print 'Conversion start'
        start_ = RtbConversionTracker.objects.aggregate(Max("Date"))["Date__max"]
        if start_ is None:
            start = '2016-09-01 00:00:00'
        else:
            start = (start_ + timedelta(seconds=1)).strftime('%Y-%m-%d %H:%M:%S')
        try:
            start_date = datetime.strptime(start, '%Y-%m-%d %H:%M:%S')
            end_date = datetime.now()
            startStr = ''
            for result in perdelta(start_date, end_date, timedelta(minutes=Verbtimedelta)):
                endStr = result.strftime('%Y-%m-%d %H:%M:%S') + ".999999999"
                if startStr == '':
                    startStr = result.strftime('%Y-%m-%d %H:%M:%S')
                    continue
                # Update time every circle
                LastModified.objects.filter(type='get_data_from_impression_tracker').update(date=timezone.make_aware(datetime.now(), timezone.get_default_timezone()))
                impTracker(startStr, endStr, 'Conversion')
                startStr = (result + timedelta(seconds=1)).strftime('%Y-%m-%d %H:%M:%S')
        except Exception as e:
            print ' Conversion Error: ' + str(e)

        # AdStart Tracker
        print 'Ad-start start'
        start_ = RtbAdStartTracker.objects.aggregate(Max("Date"))["Date__max"]
        if start_ is None:
            start = '2016-09-01 00:00:00'
        else:
            start = (start_ + timedelta(seconds=1)).strftime('%Y-%m-%d %H:%M:%S')
        try:
            start_date = datetime.strptime(start, '%Y-%m-%d %H:%M:%S')
            end_date = datetime.now()
            startStr = ''
            for result in perdelta(start_date, end_date, timedelta(minutes=Verbtimedelta)):
                endStr = result.strftime('%Y-%m-%d %H:%M:%S') + ".999999999"
                if startStr == '':
                    startStr = result.strftime('%Y-%m-%d %H:%M:%S')
                    continue
                # Update time every circle
                LastModified.objects.filter(type='get_data_from_impression_tracker').update(date=timezone.make_aware(datetime.now(), timezone.get_default_timezone()))
                impTracker(startStr, endStr, 'AdStart')
                startStr = (result + timedelta(seconds=1)).strftime('%Y-%m-%d %H:%M:%S')
        except Exception as e:
            print ' Ad-start Error: ' + str(e)

        # refresh data for the UI (grids and graphs)
        refreshPrecalculatedDataTrackerCron()

        LastModified.objects.filter(type='get_data_from_impression_tracker').delete()
    except ValueError, e:
        LastModified.objects.filter(type='get_data_from_impression_tracker').delete()
        print 'Error: ' + str(e)