import pandas as pd import random import pyvw from datetime import datetime from sklearn.feature_extraction import FeatureHasher from sklearn.metrics.pairwise import cosine_similarity from itertools import izip import os import cuttsum.judgements from cuttsum.misc import event2semsim import math np.random.seed(42) matches_df = cuttsum.judgements.get_merged_dataframe() tags = enum("READY", "WORKER_START", "WORKER_STOP") class FeatureMapper(dict): def __init__(self): self.store = dict() self._inv_store = dict() self._idx = 0 def __getitem__(self, key): if key not in self.store: self.store[key] = self._idx self._inv_store[self._idx] = key self._idx += 1 return self.store[key] def items(self): return self.store.items() def inv(self, idx):
import pandas as pd import random import pyvw from datetime import datetime from sklearn.feature_extraction import FeatureHasher from sklearn.metrics.pairwise import cosine_similarity from itertools import izip import os import cuttsum.judgements from cuttsum.misc import event2semsim import math np.random.seed(42) matches_df = cuttsum.judgements.get_merged_dataframe() tags = enum("READY", "WORKER_START", "WORKER_STOP") class FeatureMapper(dict): def __init__(self): self.store = dict() self._inv_store = dict() self._idx = 0 def __getitem__(self, key): if key not in self.store: self.store[key] = self._idx self._inv_store[self._idx] = key self._idx += 1 return self.store[key]
from mpi4py import MPI from cuttsum.misc import enum from datetime import datetime tags = enum("READY", "DONE", "STOP", "ADD_JOB", "WORKER_START", "WORKER_STOP") def start_service(service, service_configs): if service == "corenlp": cnlp_config = service_configs.get("corenlp", {}) mem = cnlp_config.get("mem", "50G") threads = int(cnlp_config.get("threads", 25)) max_message_len = int(cnlp_config.get("max_message_len", 524288)) port = int(cnlp_config.get("port", 9999)) import corenlp as cnlp cnlp.server.start( port=port, mem=mem, threads=threads, max_message_len=max_message_len, annotators=["tokenize", "ssplit", "pos", "lemma", "ner", "depparse"], corenlp_props={ "pos.maxlen": "150", "ssplit.eolonly": "true"}) elif service.endswith("-lm"): lm_config = service_configs[service] path = lm_config["path"] port = int(lm_config["port"]) order = int(lm_config.get("order", 3)) print "starting", service, " on port", port import cuttsum.srilm as srilm pid = srilm.start_lm(path, order, port) lm_config["pid"] = pid print "lm is started with pid", pid