def __init__(self, *args, **kwargs):
        MRJob.__init__(self, *args, **kwargs)

        ## load entities from json file
        log("loading entity list")
        entities = json.load(urllib.urlopen("https://s3.amazonaws.com/trec-kba-2012/entity-urlnames.json"))
        self.entity_representations = toy_kba_algorithm.prepare_entities(entities)
Exemple #2
0
 def __init__(self, *args, **kwargs):
     MRJob.__init__(self, *args, **kwargs)
     for index, ar in enumerate(sys.argv):
         if ar == '--config-file':
             path = sys.argv[index + 1]
             path = path[path.rfind('/') + 1:]
     self.lines = [line for line in open(path, "r", encoding="utf-8")]
    def __init__(self, *args, **kwargs):
        MRJob.__init__(self, *args, **kwargs)

        self.clusters = dict()
        self.names = dict()
        self.cluster_from_file = [
            cluster_line for cluster_line in open(
                self.options.clusters[0], "r", encoding="utf-8")
        ]
    def __init__(self, args):
	 lines = [line1.strip() for line1 in sys.stdin]
	 temp_list=list()
	 for i in range(0,len(lines)-1):
	  temp_list.append(lines[i].replace('"', '\\"').strip('\n'))
	  temp_list.append(lines[i+1].replace('"', '\\"').strip('\n'))
	 args=temp_list
	 MRJob.__init__(self, args)
         yield self,args
Exemple #5
0
 def __init__(self, *args, **kwargs):
     MRJob.__init__(self, *args, **kwargs)
    
     cluster_path = self.get_cluster_file_path()
     self.clusters = dict()
     self.names = dict()
     self.cluster_from_file = [cluster_line for cluster_line in open('clusteronly.txt', "r", encoding="utf-8")]
           
     self.data_lines = [line for line in open('dataonly.txt', "r", encoding="utf-8")]
Exemple #6
0
    def __init__(self, *args, **kwargs):
        MRJob.__init__(self, *args, **kwargs)
        self.initial_counts = np.zeros(num_states)

        self.emission_probabilities = np.random.random((num_observations, num_states))
        self.emission_probabilities /= self.emission_probabilities.sum(1)[:,None]

        self.initial_probabilities = np.random.random(num_states)
        self.initial_probabilities /= self.initial_probabilities.sum()

        self.final_probabilities = np.random.random(num_states)
        self.final_probabilities /= self.final_probabilities.sum()

        self.transition_probabilities = np.random.random((num_states, num_states))
        self.transition_probabilities /= self.transition_probabilities.sum(1)[:,None]
Exemple #7
0
    def __init__(self, *args, **kwargs):
        MRJob.__init__(self, *args, **kwargs)

        from os import path
        filename = 'hmm.pkl'
        if path.exists(filename):
            self.hmm = pickle.loads(open(filename).read().decode('string-escape'))
        else:
            # Initialize the HMM parameters randomly.
            self.hmm = HMM(word_dict, tag_dict)
            self.hmm.initialize_random()

        self.log_likelihood = 0
        self.initial_counts = 0
        self.emission_counts = 0
        self.transition_counts = 0
        self.final_counts = 0
Exemple #8
0
    def __init__(self, *args, **kwargs):
        MRJob.__init__(self, *args, **kwargs)
        self.initial_counts = np.zeros(num_states)

        self.emission_probabilities = np.random.random(
            (num_observations, num_states))
        self.emission_probabilities /= self.emission_probabilities.sum(1)[:,
                                                                          None]

        self.initial_probabilities = np.random.random(num_states)
        self.initial_probabilities /= self.initial_probabilities.sum()

        self.final_probabilities = np.random.random(num_states)
        self.final_probabilities /= self.final_probabilities.sum()

        self.transition_probabilities = np.random.random(
            (num_states, num_states))
        self.transition_probabilities /= self.transition_probabilities.sum(
            1)[:, None]
Exemple #9
0
    def __init__(self, *args, **kwargs):
        MRJob.__init__(self, *args, **kwargs)

        # Create HMM object.
        self.hmm = HMM(word_dict, tag_dict)

        from os import path
        filename = 'hmm.txt'
        if path.exists(filename):
            # Load the HMM parameters from a text file.
            load_parameters(filename, self.hmm, smoothing=0.1)
        else:
            # Initialize the HMM parameters randomly.
            self.hmm.initialize_random()

        self.log_likelihood = 0
        self.initial_counts = 0
        self.emission_counts = 0
        self.transition_counts = 0
        self.final_counts = 0
Exemple #10
0
 def __init__(self, args):
    MRJob.__init__(self, args)  
 def __init__(self, args):
     MRJob.__init__(self, args=args)
 def __init__(self, *args, **kwargs):
     MRJob.__init__(self, *args, **kwargs)
     self.hmm = hmm