Esempio n. 1
0
    def handle(self, cmd: UpdateRankings) -> RankingsUpdated:
        """Update project rankings"""
        if not cmd.language:
            raise ValueError("Language must not be empty")
        if cmd.start_date and cmd.start_date > cmd.end_date:
            raise ValueError("Invalid dates")

        rankings = self._query_database_num_ratings(cmd)

        # Use pandas to perform the ranking
        df = pd.DataFrame(rankings)
        df["language"] = cmd.language
        df["period"] = cmd.period
        df["start_date"] = cmd.start_date
        df["end_date"] = cmd.end_date
        df["rank"] = df[df.columns[1]].rank(0, "min", ascending=False).astype(np.int)

        # Delete existing rankings prior to insert
        query = (
            Ranking.query.filter(Ranking.language == cmd.language)
            .filter(Ranking.end_date == cmd.end_date)
            .filter(Ranking.period == cmd.period)
        )
        query.delete()

        data = iter(df.to_records(False))
        batch = from_sqlalchemy_table(Ranking.__table__, data, list(df.columns))
        batch.execute(db.engine.raw_connection)

        yield RankingsUpdated(cmd.period, cmd.start_date, cmd.end_date, cmd.language)
Esempio n. 2
0
    def handle(self, cmd: ExecuteMahoutRecommender):
        model = MODELS.get(cmd.model)

        source = abspath(join(RATINGS_PATH, model.source))
        destination = abspath(join(EXPORT_PATH, model.destination))

        log.info('Running Mahout')
        run = ["mvn", "exec:java", "-DbatchSize=100",
               "-DmodelID={}".format(model.id),
               "-Dsrc=" + source,
               "-Dout=" + destination]
        subprocess.call(run, cwd="../growser-mahout/")

        Recommendation.query.filter(
            Recommendation.model_id == model.id).delete()

        columns = ['model_id', 'repo_id', 'recommended_repo_id', 'score']
        batch = from_sqlalchemy_table(
            Recommendation.__table__, from_csv(destination), columns)

        for rows in batch.batch_execute(db.engine.raw_connection):
            log.info("Batch complete: {}".format(rows))

        return RecommendationsUpdated(model.id, batch)
Esempio n. 3
0
 def test_errors_invalid_type(self):
     with self.assertRaises(TypeError):
         from_sqlalchemy_table(None, get_fake_rows(5), columns)
Esempio n. 4
0
    def test_sqlalchemy_table(self):

        bulk = from_sqlalchemy_table(TestTable, get_fake_rows(5), columns)
        assert columns == [c.name for c in bulk.columns]