Exemple #1
0
import pandas as pd
import random
import pyvw
from datetime import datetime
from sklearn.feature_extraction import FeatureHasher
from sklearn.metrics.pairwise import cosine_similarity
from itertools import izip
import os
import cuttsum.judgements
from cuttsum.misc import event2semsim
import math

np.random.seed(42)

matches_df = cuttsum.judgements.get_merged_dataframe()
tags = enum("READY", "WORKER_START", "WORKER_STOP")

class FeatureMapper(dict):
    def __init__(self):
        self.store = dict()
        self._inv_store = dict()
        self._idx = 0
    def __getitem__(self, key):
        if key not in self.store:
            self.store[key] = self._idx
            self._inv_store[self._idx] = key
            self._idx += 1
        return self.store[key]
    def items(self):
        return self.store.items()
    def inv(self, idx):
Exemple #2
0
import pandas as pd
import random
import pyvw
from datetime import datetime
from sklearn.feature_extraction import FeatureHasher
from sklearn.metrics.pairwise import cosine_similarity
from itertools import izip
import os
import cuttsum.judgements
from cuttsum.misc import event2semsim
import math

np.random.seed(42)

matches_df = cuttsum.judgements.get_merged_dataframe()
tags = enum("READY", "WORKER_START", "WORKER_STOP")


class FeatureMapper(dict):
    def __init__(self):
        self.store = dict()
        self._inv_store = dict()
        self._idx = 0

    def __getitem__(self, key):
        if key not in self.store:
            self.store[key] = self._idx
            self._inv_store[self._idx] = key
            self._idx += 1
        return self.store[key]
Exemple #3
0
from mpi4py import MPI
from cuttsum.misc import enum
from datetime import datetime

tags = enum("READY", "DONE", "STOP", "ADD_JOB", "WORKER_START", "WORKER_STOP")

def start_service(service, service_configs):
    if service == "corenlp":
        cnlp_config = service_configs.get("corenlp", {})
        mem = cnlp_config.get("mem", "50G")
        threads = int(cnlp_config.get("threads", 25))
        max_message_len = int(cnlp_config.get("max_message_len", 524288))
        port = int(cnlp_config.get("port", 9999))
        import corenlp as cnlp
        cnlp.server.start(
            port=port,
            mem=mem, threads=threads, max_message_len=max_message_len,
            annotators=["tokenize", "ssplit", "pos", "lemma", "ner", "depparse"],
            corenlp_props={
                "pos.maxlen": "150",
                "ssplit.eolonly": "true"})
    elif service.endswith("-lm"):
        lm_config = service_configs[service]
        path = lm_config["path"]    
        port = int(lm_config["port"])
	order = int(lm_config.get("order", 3))
        print "starting", service, " on port", port
        import cuttsum.srilm as srilm
        pid = srilm.start_lm(path, order, port)
        lm_config["pid"] = pid
        print "lm is started with pid", pid