def run(procId, procCount): connection = PgSQL.connect(user = "******", database = DatabaseName); memDb = redis.Redis( host='localhost', port=6379 ); TrainDbConfig = DbBuildConfig['train']; TestDbConfig = DbBuildConfig['test']; trainDocDb = DocumentsDatabase(connection, TrainDbConfig['DocTagsTable'], TrainDbConfig['RawDocTable'], TrainDbConfig['TagsTable'], TrainDbConfig['DocumentsTable'] ); testDocDb = DocumentsDatabase(connection, TestDbConfig['DocTagsTable'], TestDbConfig['RawDocTable'], TestDbConfig['TagsTable'], TestDbConfig['DocumentsTable'] ); trainFeatureDb = FeatureDatabase(connection, memDb, trainDocDb, TrainDbConfig['FeaturesTable'], TrainDbConfig['DocFeaturesTable'], TrainDbConfig['TagSpecificFeatureTable']); testFeatureDb = FeatureDatabase(connection, memDb, testDocDb, TestDbConfig['FeaturesTable'], TestDbConfig['DocFeaturesTable'], TestDbConfig['TagSpecificFeatureTable']); classifier = Classifier(connection, trainFeatureDb, testFeatureDb, ClassifierTableConfig['predictedTrain'], ClassifierTableConfig['predictedTest'], trainDocDb); # if procId == 0: # classifier.createTables(); # classifier.createTagPredictTables(); # classifier.cleanClassificationTables(); tags = trainDocDb.getTagsList(); count = 0; for tag in tags: count = count + 1; if count % procCount != procId: continue; if count < 9000: continue; print "Processing ", tag, " ", count; c1 = trainDocDb.getTagCount(tag); if c1 <= 23: continue; classifier.predictForTag( tag );
def run(): connection = PgSQL.connect(user = "******", database = DatabaseName); memDb = redis.Redis( host='localhost', port=6379 ); TrainDbConfig = DbBuildConfig['train']; TestDbConfig = DbBuildConfig['test']; trainDocDb = DocumentsDatabase(connection, TrainDbConfig['DocTagsTable'], TrainDbConfig['RawDocTable'], TrainDbConfig['TagsTable'], TrainDbConfig['DocumentsTable'] ); trainFeatureDb = FeatureDatabase(connection, memDb, trainDocDb, TrainDbConfig['FeaturesTable'], TrainDbConfig['DocFeaturesTable'], TrainDbConfig['TagSpecificFeatureTable']); testFeatureDb = FeatureDatabase(connection, memDb, None, TestDbConfig['FeaturesTable'], TestDbConfig['DocFeaturesTable'], TestDbConfig['TagSpecificFeatureTable']); classifier = Classifier(connection, trainFeatureDb, testFeatureDb, ClassifierTableConfig['predictedTrain'], ClassifierTableConfig['predictedTest'], trainDocDb); # classifier.createTables(); classifier.createTagPredictTables(); classifier.cleanClassificationTables(); tags = trainDocDb.getTagsList(); s1 = 0; s2 = 0; for tag in tags: features = trainFeatureDb.getTagSpecificFeatures( tag ); testTag = tag; hashes = trainFeatureDb.getTagSpecificFeatures(testTag); if not hashes: continue; c1 = trainDocDb.getTagCount(testTag); if c1 <= 25: continue; s1 += c1; print classifier.predictForTag( tag ); classifier.saveClassificationResults();