def handle(self, cmd: UpdateRankings) -> RankingsUpdated: """Update project rankings""" if not cmd.language: raise ValueError("Language must not be empty") if cmd.start_date and cmd.start_date > cmd.end_date: raise ValueError("Invalid dates") rankings = self._query_database_num_ratings(cmd) # Use pandas to perform the ranking df = pd.DataFrame(rankings) df["language"] = cmd.language df["period"] = cmd.period df["start_date"] = cmd.start_date df["end_date"] = cmd.end_date df["rank"] = df[df.columns[1]].rank(0, "min", ascending=False).astype(np.int) # Delete existing rankings prior to insert query = ( Ranking.query.filter(Ranking.language == cmd.language) .filter(Ranking.end_date == cmd.end_date) .filter(Ranking.period == cmd.period) ) query.delete() data = iter(df.to_records(False)) batch = from_sqlalchemy_table(Ranking.__table__, data, list(df.columns)) batch.execute(db.engine.raw_connection) yield RankingsUpdated(cmd.period, cmd.start_date, cmd.end_date, cmd.language)
def handle(self, cmd: ExecuteMahoutRecommender): model = MODELS.get(cmd.model) source = abspath(join(RATINGS_PATH, model.source)) destination = abspath(join(EXPORT_PATH, model.destination)) log.info('Running Mahout') run = ["mvn", "exec:java", "-DbatchSize=100", "-DmodelID={}".format(model.id), "-Dsrc=" + source, "-Dout=" + destination] subprocess.call(run, cwd="../growser-mahout/") Recommendation.query.filter( Recommendation.model_id == model.id).delete() columns = ['model_id', 'repo_id', 'recommended_repo_id', 'score'] batch = from_sqlalchemy_table( Recommendation.__table__, from_csv(destination), columns) for rows in batch.batch_execute(db.engine.raw_connection): log.info("Batch complete: {}".format(rows)) return RecommendationsUpdated(model.id, batch)
def test_errors_invalid_type(self): with self.assertRaises(TypeError): from_sqlalchemy_table(None, get_fake_rows(5), columns)
def test_sqlalchemy_table(self): bulk = from_sqlalchemy_table(TestTable, get_fake_rows(5), columns) assert columns == [c.name for c in bulk.columns]