Example #1
0
	def add_splitter(self, model_name, receptacle_id):
		splitter_id = self.global_ids.get_next_splitter_id()
		rids = []
		receptacle_count = SPLITTER_MODELS[model_name]
		for r_id in range(receptacle_count):
			rids.append(self.global_ids.get_next_receptacle_id())

		if True == DEBUG:
			print('Support.add_splitter() receptacle_count:{0}, rids:{1}'.format(receptacle_count, rids))

		sp = Splitter(model_name, splitter_id, rids)

		receptacle = self.get_receptacle_by_id(receptacle_id)
		receptacle.connect_load('SPLITTER', sp)

		self.full_receptacle_ids.append(receptacle_id)
		self.empty_receptacle_ids.remove(receptacle_id)

		r = Results()
		r.set_object_id(splitter_id)
		r.set_next_receptacle_id_from_list(rids)

		self.splitter_ids.append(splitter_id)
		del(r_id)
		for r_id in rids:
			r1 = sp.get_receptacle_by_id(r_id)
			if None == r1:
				print('Support.add_splitter sp.get_receptacle_by_id returned None for r1. r_id:{0}, rids:{1}'.format(r_id, rids))
			self.empty_receptacle_ids.append(r_id)

		if True == DEBUG:
			print('Support.add_splitter(). Final empty_receptacle_ids:{0}'.format(self.empty_receptacle_ids))
			print('Support.add_splitter(). Final full_receptacle_ids:{0}'.format(self.full_receptacle_ids))

		return r
Example #2
0
    def split_patents(self, temp_dir, filename):
        self.logger.info("splitting files")
        xmls = get_files(temp_dir, ".xml")
        splitter = Splitter()

        for file in xmls:
            splitter.split_file(file, join(self.working_directory, self.patentDir, filename))
Example #3
0
def main():
    # Set working directory to project folder
    os.chdir('../.')
    print(os.getcwd())

    # Create Logger File to track all changes
    logger = Logger(os)

    # Create a list of words for parser to ignore
    stop_words = []  #['PSY', 'STAT']

    ninja = Splitter(stop_words)

    ans = input('Do you want to manually input lines? ')

    # Create Messenger Object to ask prompts
    if 'y' in ans or 'Y' in ans:
        messenger = Messenger()

        line = ''
        exit = False

        while not exit:
            line = messenger.collect_input()

            if line == 'quit' or line == 'exit':
                exit = True
            else:
                # Output would be collected here
                print(ninja.split_line(line))
    #else:

    print('Exiting code')
Example #4
0
def hy_prediction(dataio, patient_info, patient_array, params):
    ### Task Setting 
    task = params['task'][1]
    split_method = 'ratio'
    ratio = 0.8 # provide the ratio
    krun = 10 # run 5 times then average the result 
    ### Initialization
    acc = np.zeros(krun, dtype='float32') # evaluation metric
    n_feature = dataio.feature.feature_len
    param_w = np.zeros([n_feature, krun], dtype='float32') # weights parameter
    ### H&Y Reading
    feature = dataio.feature
    patient_info = feature.get_hy_stage(patient_info, patient_array)
    print ('-----')
#    split = Splitter(task, patient_array, ratio, split_method, patient_info)
    split = Splitter(task, patient_array, ratio, split_method)
    for k in range(krun):
        ### Data Splitting
        train_data, test_data = split.get_splitter(k)
        ### Model Training 
        hy_pred = HYPredictor(k, patient_info, train_data, params['result_path'])
        model, y_pred = hy_pred.train_model()
        param_w[:,k], _ = hy_pred.get_param()
        ### Evaluating
        hy_eval = Evaluator(model, test_data, patient_info, task, hy_pred)
        acc[k] = hy_eval.compute_accuracy()
    print ('-----')
    print ('Accuracy of the %s task: %f' %(task, np.sum(acc)/krun)) 
    ### Displaying Feature (selected by prediction model)
    feature = dataio.feature
    feature.get_pred_feature(param_w, krun, 'yh')  
Example #5
0
def pd_prediction(dataio, patient_info, patient_array, params):
       
    ### Task Setting 
    task = params['task'][0] # disease prediction
    split_method = 'cross-validation'
    kfold = 5 # 5-fold validation
    ### Initialization
    auc = np.zeros(kfold, dtype='float32') # evaluation metrics
    ap = np.zeros(kfold, dtype='float32')
    n_feature = dataio.feature.feature_len
    param_w = np.zeros([n_feature, kfold], dtype='float32')
    print ('-----')
    split = Splitter(task, patient_array, kfold, split_method)
    for k in range(kfold): # each fold, k is the index of test set
        ### Data Splitting
        train_data, test_data = split.get_splitter(k)
        ### Model Training 
        pd_pred = PDPredictor(k, patient_info, train_data, params['result_path'])
        model, y_pred = pd_pred.train_model()
        param_w[:,k], _ = pd_pred.get_param()
        ### Evaluating
        pd_eval = Evaluator(model, test_data, patient_info, task, pd_pred)
        auc[k], ap[k] = pd_eval.compute_accuracy()
    print ('-----')
    print ('AUC of the %s task: %f' %(task, np.sum(auc)/kfold))
    print ('Average Precision of the %s task: %f' %(task, np.sum(ap)/kfold))    
    ### Displaying Feature (selected by prediction model)
    feature = dataio.feature
    feature.get_pred_feature(param_w, kfold, 'pd')  
Example #6
0
def moca_prediction(dataio, patient_info, patient_array, params):
    ### Task Setting 
    task = params['task'][2]
    split_method = 'ratio'
    ratio = 0.8 # provide the ratio
    krun = 5 # run 5 times then average the result 
    ### Initialization
    rmse = np.zeros(krun, dtype='float32') # evaluation metric
    n_feature = dataio.feature.feature_len
    param_w = np.zeros([n_feature, krun], dtype='float32')
    ### MoCA Reading
    feature = dataio.feature
    patient_info = feature.get_moca_score(patient_info, patient_array)
    print ('-----')
    split = Splitter(task, patient_array, ratio, split_method)
    for k in range(krun):
        ### Data Splitting
        train_data, test_data = split.get_splitter(k)
        ### Model Training 
        moca_pred = MoCAPredictor(k, patient_info, train_data, params['result_path'])
        model, y_pred = moca_pred.train_model()
        param_w[:,k] = moca_pred.get_param()
        ### Evaluating
        pd_eval = Evaluator(model, test_data, patient_info, task, moca_pred)
        rmse[k] = pd_eval.compute_accuracy()
    print ('-----')
    print ('RMSE of the %s task: %f' %(task, np.sum(rmse)/krun))
    ### Displaying Feature (selected by prediction model)
    feature = dataio.feature
    feature.get_pred_feature(param_w, krun, 'moca')
Example #7
0
 def test_splitter_does_the_map(self):
     areas = read_kml_areas("areas.kml")
     city_spots = [
         {
             "name": "skytower",
             "type": "shopping mall",
             "lat": 17.019690,
             "lng": 51.094880
         },
         {
             "name": "Biedronka close to skytower",
             "type": "small shop",
             "lat": 17.018921,
             "lng": 51.097994
         },
         {
             "name": "Panorama Racławicka",
             "type": "historical building",
             "lat": 17.044462,
             "lng": 51.110171
         },
         {
             "name": "Galeria Dominikańska",
             "type": "shopping mall",
             "lat": 17.040685,
             "lng": 51.108244
         },
     ]
     splitter = Splitter()
     assigned = splitter.split_all_points(city_spots, areas)
     self.assertTrue(len(assigned) == 2)
     self.assertIn('Rynek 1', assigned)
     self.assertIn('Gajowice 1', assigned)
Example #8
0
 def visitTextFile(self, textfile):
     splitter = Splitter(textfile.filePath, len(self.workers))
     file_split_result = splitter.split()
     self.operations[textfile.id] = FilePartition(textfile.id,
                                                  len(self.workers),
                                                  file_split_result,
                                                  textfile.filePath)
     self._set_collect_count(textfile)
Example #9
0
 def __init__(self, modelId, runNo, filter_dics, filename, _type, splitter_type):
     super(Tracker, self).__init__()
     self.modelId = modelId
     self.runNo = runNo
     self.run_dir = str(modelId) + '_' + str(runNo)
     self.filename = filename
     self.filters = self.prepare_filters(filter_dics)
     self.type = _type # lazy or eager
     self.splitter = Splitter(splitter_type)
     self.track_data = {}
Example #10
0
 def __init__(self):
     self.splitter = Splitter()
     self.postagger = POSTagger()
     self.dicttagger = DictionaryTagger([
         '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/positive.yml',
         '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/negative.yml',
         '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/morePositive.yml',
         '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/moreNegative.yml',
         '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/invert.yml'
     ])
Example #11
0
def process_text(text):
        splitter = Splitter()
        postagger = POSTagger()

        # Split the sentences to words
        splitted_sentences = splitter.split(text)

        # Do Parts of Speech Tagging on the words
        pos_tagged_sentences = postagger.pos_tag(splitted_sentences)

        dict_tagged_sentences = dicttagger.tag(pos_tagged_sentences)
        return sum_score(dict_tagged_sentences)
Example #12
0
 def OCRImage(self,
              imageUrl,
              splitLength=None):  # ////////////////////////////////////////
     # Call ocr.space API, wait and return list of tweets
     ocrResult = self.ocr_api.ocr_url(imageUrl)
     if splitLength != None:
         tweetsToSend = Splitter.forTweets(
             ocrResult, splitLength=splitLength
         )  # tweet length limits (280-8-15) user name and brackets
     else:
         tweetsToSend = Splitter.forTweets(ocrResult)
     if DEBUG:
         print(" Tweet chain length {}".format(len(tweetsToSend)))
     return tweetsToSend
Example #13
0
    def split_by_silence(self):
        '''
        Uses the Splitter class to split the audio by silence
        to get the timestamps and respective filenames of the
        split segments.

        Segments is a list of tuples (filename, (start_time, end_time)).
        '''
        if (self.audio_extracted):
            splitter = Splitter(self.audio_file)
        else:
            raise Exception(
                "ERROR: File has not been extracted from video yet.")

        splitter.run()
        self.segments = splitter.get_segments()
Example #14
0
    def __init__(self, dset_name, net_names, hard_labels, device, exp=None):
        if exp is None:
            exp = 0
            while osp.exists(osp.join(cfg.DATA_DIR, 'exp_' + str(exp))):
                exp += 1

        self.exp_dir = osp.join(cfg.DATA_DIR, 'exp_' + str(exp))
        self.num_exp = exp

        dset_dir = osp.join(self.exp_dir, dset_name)
        self.splitting_dir = osp.join(dset_dir, cfg.SPLITTING_DIR)
        self.feat_dir = osp.join(dset_dir, cfg.FEATURE_DIR)
        self.label_dir = osp.join(dset_dir, cfg.LABEL_DIR,
                                  'hard' if hard_labels else 'soft')
        self.net_dir = osp.join(dset_dir, cfg.NET_DIR,
                                'hard' if hard_labels else 'soft')
        self.res_dir = osp.join(dset_dir, cfg.RESULT_DIR,
                                'hard' if hard_labels else 'soft')

        self.dset = cfg.DSETS[dset_name]

        self.splitter = Splitter(self.dset, self.splitting_dir)
        self.extractor = Extractor(self.dset, self.splitting_dir,
                                   self.feat_dir, net_names, device)
        self.augmenter = Augmenter(self.dset, self.splitting_dir,
                                   self.feat_dir, self.label_dir, net_names,
                                   hard_labels)
        self.trainer = Trainer(self.dset, self.label_dir, self.net_dir,
                               self.res_dir, net_names, hard_labels, device)
Example #15
0
def run_splitter(batch):
    dataset = Splitter(
        batch,
        annotations=args["--annotations"],
        labels=args["--labels"],
        overlap=args["--overlap"],
        duration=args["--duration"],
        output_directory=args["--output_directory"],
    )

    dataloader = torch.utils.data.DataLoader(
        dataset,
        # batch_size=batch_size,
        batch_size=1,
        shuffle=False,
        num_workers=args["--cores_per_node"],
        collate_fn=dataset.collate_fn,
    )

    start = timer()
    outputs = []
    for data in dataloader:
        for out in data:
            outputs.append(out)
    end = timer()

    print("DEBUG: end - start", end - start)

    return outputs
    def __call__(self, _, *, audio_paths=[]):
        batch_size = len(audio_paths)
        if batch_size == 0:
            return []

        dataset = Splitter(
            audio_paths,
            annotations=args["--annotations"],
            labels=args["--labels"],
            overlap=args["--overlap"],
            duration=args["--duration"],
            output_directory=args["--output_directory"],
        )

        dataloader = torch.utils.data.DataLoader(
            dataset,
            # batch_size=batch_size,
            batch_size=1,
            shuffle=False,
            num_workers=args["--cores_per_node"],
            collate_fn=dataset.collate_fn,
        )

        start = timer()
        outputs = []
        for idx, data in enumerate(dataloader):
            for out in data:
                outputs.append(out)
        end = timer()
        print("DEBUG: end - start", end - start)

        return outputs
    def build_tree(self, X, y, sample_weight, class_distribution, level):
        # Need node weight for counting feature importances and probability of classes
        node_weight = np.sum(class_distribution)

        # node has only one class
        if np.unique(y).shape[0] == 1:
            proba = class_distribution / node_weight
            return Tree(info=proba, is_leaf=True)

        # node has less than min_samples_split samples
        if y.shape[0] < self.min_samples_split:
            proba = class_distribution / node_weight
            return Tree(info=proba, is_leaf=True)

        # node has less than 2*min_samples_leaf samples, so children would have less than min_sample_leaf samples
        if y.shape[0] < 2 * self.min_samples_leaf:
            proba = class_distribution / node_weight
            return Tree(info=proba, is_leaf=True)

        # tree has max_depth depth
        if self.max_depth is not None:
            if level == self.max_depth:
                proba = class_distribution / node_weight
                return Tree(info=proba, is_leaf=True)

        splitter = Splitter()
        feature, threshold, split_pos, index = splitter.find_best_split(X, y, sample_weight,
                                                                        class_distribution, self.min_samples_leaf)
        if feature is None:
            proba = class_distribution / node_weight
            return Tree(info=proba, is_leaf=True)

        gain = splitter.impurity_gain
        left_distribution = splitter.left_distribution
        right_distribution = splitter.right_distribution

        self.feature_importances_[feature] += gain * (float(node_weight) / self.total_weight)
        X = X[index]
        y = y[index]
        sample_weight = sample_weight[index]

        left_tree = self.build_tree(X[0:split_pos], y[0:split_pos], sample_weight[0:split_pos],
                                    left_distribution, level+1)
        right_tree = self.build_tree(X[split_pos:], y[split_pos:], sample_weight[split_pos:],
                                     right_distribution, level+1)
        return Tree(left_tree, right_tree, Predicate(feature, threshold), class_distribution)
Example #18
0
def processQuestion(gloveModel,
                    question,
                    minLen=1,
                    maxLen=3,
                    useAPI=False,
                    useSynonyms=False):
    tagger = POSTagger()
    pos = tagger.parse(question)
    # create splitter and generalizer
    splitter = Splitter()
    if question[-1] == '?' or question[-1] == '.':
        question = question[:-1]
    gen_question = splitter.generalize(question, pos)
    labels = []
    resultsExists = False
    if not useAPI:
        parts = list(splitter.split(gen_question, min=minLen, max=maxLen))
    else:
        resultsExists = True
        apiResult, _ = api.getBinaryRelations(question)
        parts = [
            rel.predicate for rel in apiResult
            if len(rel.predicate_positions_) > 1
        ]
        for part in parts:
            if len(part.split()) > 1:
                labels.append(part.split()[0] +
                              ''.join(''.join([w[0].upper(), w[1:].lower()])
                                      for w in part.split()[1:]))
        if useSynonyms:
            predicates = [max(part.split(), key=len) for part in parts]
            if predicates is not None and len(predicates) > 0:
                for predicate in predicates:
                    for part in list(parts):
                        if predicate in part:
                            for syn in gloveModel.gloveModel.most_similar(
                                    predicate.lower()):
                                parts.append(part.replace(predicate, syn[0]))
        if len(parts) == 0:
            resultsExists = False
            parts = list(splitter.split(gen_question, min=minLen, max=maxLen))
    # create embedder part
    vectors = []
    for part in parts:
        vectors.append(gloveModel.getVector(part))
    return vectors, parts, pos, gen_question, labels, resultsExists
 def __init__(self):
     self.sentences = []
     self.abbreviation = {}
     self.load_data()
     self.load_abbrv()
     self.normalizer = Normalizer()
     self.splitter = Splitter()
     self.corrector = Filter()
     self.lemmatizer = WordNetLemmatizer()
     self.missing_apostrophe_vocab = [
         'isnt', ' arent', 'wasnt', 'werent', 'wont', 'dont', 'didnt',
         'doesnt', 'couldnt', 'shouldnt', 'hasnt', 'havent', 'hadnt'
     ]
     self.tokenizer_mistake_vocab = [
         'isn', 'aren', 'wasn', 'weren', 'won', 'don', 'didn', 'doesn',
         'couldn', 'shouldn', 'hasn', 'haven', 'hadn'
     ]
     self._norm = joblib.load('model.crfsuite')
Example #20
0
    def assemble2(self):
        """
        Builder method: build a Chain of linked Components
        :return:
        """
        log.info('Assembling Chain: %s...' % self.chain_str)

        # Create linked list of input/filter/output (ETL Component) objects
        chain_str = self.chain_str
        split_comps = []
        while chain_str:
            chain_str = chain_str.strip()

            # Check and handle Splitter construct
            # e.g. input_xml_file |(transformer_xslt|output_file) (output_std) (transformer_xslt|output_std)
            if chain_str.startswith('('):
                etl_section_name, chain_str = chain_str.split(')', 1)
                etl_section_name = etl_section_name.strip('(')

                # Check for subchain (split at Filter level)
                if '|' in etl_section_name:
                    # Have subchain: use Chain to assemble
                    sub_chain = Chain(etl_section_name, self.config_dict)
                    sub_chain.assemble2()
                    child_comp = sub_chain.first_comp
                else:
                    # Single component (Output) to split
                    child_comp = factory.create_obj(self.config_dict, etl_section_name.strip())

                # Assemble Components (can be subchains) for Splitter later
                split_comps.append(child_comp)
                if '(' in chain_str:
                    # Still components (subchains) to assemble for Splitter
                    continue

            if len(split_comps) > 0:
                # Next component is Splitter with children
                etl_comp = Splitter(self.config_dict, split_comps)
                split_comps = []
            else:

                # "Normal" case: regular Components piped in Chain
                if '|' in chain_str:
                    # More than one component in remaining Chain
                    etl_section_name, chain_str = chain_str.split('|', 1)
                else:
                    # Last element, we're done!
                    etl_section_name = chain_str
                    chain_str = None

                # Create the ETL component by name and properties
                etl_comp = factory.create_obj(self.config_dict, etl_section_name.strip())

            # Add component to end of Chain
            self.add(etl_comp)
 def __init__(self):
     self.splitter = Splitter()
     self.postagger = POSTagger()
     self.dicttagger = DictionaryTagger(
         [
             "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/positive.yml",
             "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/negative.yml",
             "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/morePositive.yml",
             "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/moreNegative.yml",
             "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/invert.yml",
         ]
     )
Example #22
0
 def __init__(self, dataset: ds.Dataset):
     self.loader = Splitter(batch_size=32).get_all(dataset)
     self.dataset_name = str(dataset)
     self.features = []
     self.labels = []
     self.pos_class = dataset.pos_class
     print(f'Encoding: {self.dataset_name}')
     if not os.path.exists('data'):
         os.makedirs('data')
     if not os.path.exists('data/encoded/'):
         os.makedirs('data/encoded/')
     self.root_dir = 'data/encoded/'
Example #23
0
  def __init__(self, memoryFile):
    self.nCycles = 0 # Used to hold number of clock cycles spent executing instructions
    
    self.dataMemory = DataMemory(memoryFile)
    self.instructionMemory = InstructionMemory(memoryFile)
    self.registerFile = RegisterFile()
    self.alu = ALU()
    self.mainControl = MainControl()
    self.splitter = Splitter()
    self.signExtender = SignExtender()
    self.andGate = AndGate()
    self.breaker = Breaker()

    self.constant4 = Constant(4)
    # self.randomControl = RandomControl()
    self.pcMux1 = Mux()
    self.pcMux2 = Mux()
    self.regMux = Mux()
    self.aluMux = Mux()
    self.resultMux = Mux()
    self.luiMux = Mux()

    self.adder = Add()
    self.branchAdder = Add()

    self.jumpAddress = JMPAddress()
    self.shiftBranch = LeftShiftTwo()
    self.shiftJump = LeftShiftTwo()

    self.pc = PC(hex(0xbfc00000))  # hard coded "boot" address
    
    self.elements = [self.constant4, self.adder, self.instructionMemory, self.breaker, self.splitter,
                     self.shiftJump, self.mainControl, self.regMux, self.signExtender, self.luiMux, self.registerFile,
                     self.jumpAddress, self.shiftBranch, self.branchAdder, self.aluMux, self.alu, self.dataMemory,
                     self.andGate, self.pcMux1, self.pcMux2, self.resultMux, self.registerFile, self.pc]
    
    self._connectCPUElements()
Example #24
0
class SentimentAnalyzingService(object):
    def __init__(self):
        self.splitter = Splitter()
        self.postagger = POSTagger()
        self.dicttagger = DictionaryTagger([
            '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/positive.yml',
            '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/negative.yml',
            '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/morePositive.yml',
            '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/moreNegative.yml',
            '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/invert.yml'
        ])

    def valueOf(self, sentiment):
        if sentiment == 'positive': return 1
        if sentiment == 'negative': return -1
        return 0

    def sentence_score(self, sentence_tokens, previous_token, acum_score):
        if not sentence_tokens:
            return acum_score
        else:
            current_token = sentence_tokens[0]
            tags = current_token[2]
            token_score = sum([self.valueOf(tag) for tag in tags])
            if previous_token is not None:
                previous_tags = previous_token[2]
                if 'inc' in previous_tags:
                    token_score *= 2.0
                elif 'dec' in previous_tags:
                    token_score /= 2.0
                elif 'inv' in previous_tags:
                    token_score *= -1.0
            return self.sentence_score(sentence_tokens[1:], current_token,
                                       acum_score + token_score)

    def sentiment_score(self, dictTaggedSentences):
        return sum([
            self.sentence_score(sentence, None, 0.0)
            for sentence in dictTaggedSentences
        ])

    def performBasicSentimentAnalysis(self, textToBeAnalysed):
        sentences = self.splitter.splitParagraphToListOfSentences(
            textToBeAnalysed)
        pos_tagged_sentences = self.postagger.pos_tag(sentences)
        dict_tagged_sentences = self.dicttagger.tag(pos_tagged_sentences)

        score = self.sentiment_score(dict_tagged_sentences)
        return score
class SentimentAnalyzingService(object):
    def __init__(self):
        self.splitter = Splitter()
        self.postagger = POSTagger()
        self.dicttagger = DictionaryTagger(
            [
                "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/positive.yml",
                "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/negative.yml",
                "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/morePositive.yml",
                "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/moreNegative.yml",
                "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/invert.yml",
            ]
        )

    def valueOf(self, sentiment):
        if sentiment == "positive":
            return 1
        if sentiment == "negative":
            return -1
        return 0

    def sentence_score(self, sentence_tokens, previous_token, acum_score):
        if not sentence_tokens:
            return acum_score
        else:
            current_token = sentence_tokens[0]
            tags = current_token[2]
            token_score = sum([self.valueOf(tag) for tag in tags])
            if previous_token is not None:
                previous_tags = previous_token[2]
                if "inc" in previous_tags:
                    token_score *= 2.0
                elif "dec" in previous_tags:
                    token_score /= 2.0
                elif "inv" in previous_tags:
                    token_score *= -1.0
            return self.sentence_score(sentence_tokens[1:], current_token, acum_score + token_score)

    def sentiment_score(self, dictTaggedSentences):
        return sum([self.sentence_score(sentence, None, 0.0) for sentence in dictTaggedSentences])

    def performBasicSentimentAnalysis(self, textToBeAnalysed):
        sentences = self.splitter.splitParagraphToListOfSentences(textToBeAnalysed)
        pos_tagged_sentences = self.postagger.pos_tag(sentences)
        dict_tagged_sentences = self.dicttagger.tag(pos_tagged_sentences)

        score = self.sentiment_score(dict_tagged_sentences)
        return score
Example #26
0
def attempt_dd_improvement(main_tour, best_length, other_tour, other_length):
    segments = Splitter(main_tour, other_tour).get_segments()
    positive_kmoves, negative_kmoves = segments_to_beneficial_kmoves(xy, segments, main_tour)
    # Now that we have independent kmoves, it can be very expensive to try all possible combinations.
    # So for efficiency's sake we try high-yield (supposedly) simple combinations:
    # 1. Try all beneficial kmoves at once. Quit if succeeds (cannot get better).
    # 2. Try each beneficial kmove sequentially, starting from highest-gain.
    # 3. Exclude negative k-moves sequentially, starting from highest-loss.
    naive_gain = best_length - other_length
    if positive_kmoves:
        max_positive_gain = sum([x[0] for x in positive_kmoves])
        if max_positive_gain < naive_gain:
            print('MAX POSITIVE GAIN < NAIVE GAIN')
            main_tour = other_tour
            best_length = other_length
            return main_tour, best_length
        all_positive = combine_segment_array([x[1] for x in positive_kmoves])
        test_tour = perform_kmove(main_tour, all_positive)
        if len(test_tour) == len(main_tour):
            main_tour = test_tour
            print('Trying all {} positive kmoves together worked; gain: {} (naive gain: {})'.format(len(positive_kmoves), max_positive_gain, naive_gain))
            assert(max_positive_gain >= naive_gain)
            assert(max_positive_gain > 0)
            return main_tour, best_length - max_positive_gain
        # There may be cases where naive gain is more than decomposed gains:
        # decomposed gains currently only return moves that can be independently performed.
        # Infeasible moves that are improvements but only can be combined with other moves to become feasible
        # (a potentially computationally expensive search) will be excluded from the decomposed moves.
        dd_gain = 0 # gain due to decomposed kmoves.
        for k in positive_kmoves:
            print('    trying {}-opt move with gain {}'.format(len(k[1]['adds']), k[0]))
            test_tour = perform_kmove(main_tour, k[1])
            if len(test_tour) == len(main_tour):
                main_tour = test_tour
                best_length -= k[0]
                dd_gain += k[0]
        if naive_gain > dd_gain:
            print('naive_gain ({}) greater than dd_gain ({})'.format(naive_gain, dd_gain))
            main_tour = other_tour
            best_length = other_length
        if dd_gain > 0 and dd_gain > naive_gain:
            print('    dd gain {} greater than naive gain {}'.format(dd_gain, naive_gain))
    elif naive_gain > 0:
            print('NAIVE GAIN > 0 WITH NO DD POSITIVE GAIN')
            main_tour = other_tour
            best_length = other_length
    return main_tour, best_length
Example #27
0
def perturbed_hill_climb(xy, tour):
    tries = 0
    success = 0
    best_length = tour_util.length(xy, tour)
    while True:
        new_tour, naive_new_length = two_opt.optimize(xy, tour_util.double_bridge(tour)) # double bridge
        #test_tour = tour[:]
        #random.shuffle(test_tour)
        #new_tour, naive_new_length = two_opt.optimize(xy, test_tour) # random restart
        segments = Splitter(tour, new_tour).get_segments()
        kmoves = segments_to_beneficial_kmoves(xy, segments, tour)
        max_gain = 0
        if kmoves:
            max_gain = sum([k[0] for k in kmoves])
        naive_gain = best_length - naive_new_length
        # There may be cases where naive gain is more than decomposed gains:
        # decomposed gains currently only return moves that can be independently performed.
        # Infeasible moves that are improvements but only can be combined with other moves to become feasible
        # (a potentially computationally expensive search) wil be excluded from the decomposed moves.
        dd_gain = 0 # gain due to decomposed kmoves.
        if kmoves:
            for k in kmoves:
                print('    trying {}-opt move with gain {}'.format(len(k[1]['adds']), k[0]))
                test_tour = perform_kmove(tour, k[1])
                if len(test_tour) == len(tour):
                    tour = test_tour
                    best_length -= k[0]
                    dd_gain += k[0]
        if naive_gain > dd_gain:
            print('naive_gain ({}) greater than dd_gain ({})'.format(naive_gain, dd_gain))
            tour = new_tour
            best_length = naive_new_length
        if naive_gain > 0 or dd_gain > 0:
            success += 1
        if dd_gain > 0 and dd_gain > naive_gain:
            print('    dd gain {} greater than naive gain {}'.format(dd_gain, naive_gain))
        tries += 1
        current_length = basic.tour_length(xy, tour)
        assert(best_length == current_length)
        if current_length <= TARGET_LENGTH:
            break
        print('current best: {} (iteration {}), improvement rate: {}'.format(best_length, tries, success / tries))
Example #28
0
    def assemble(self):
        """
        Builder method: build a Chain of linked Components
        :return:
        """
        log.info('Assembling Chain: %s...' % self.chain_str)

        # Create linked list of input/filter/output (ETL Component) objects
        chain_str_arr = self.chain_str.split('|')

        for etl_section_name in chain_str_arr:

            # Check for splitting outputs construct using '+'
            # TODO: may also construct combining Inputs or split to multiple sub-Chains
            # for now only Outputs supported for splitting
            if '+' in etl_section_name:
                section_names = etl_section_name.split('+')

                log.info('Splitting to: %s' % etl_section_name)
                child_comps = []
                for section_name in section_names:

                    if '(' in section_name and ')' in section_name:
                        section_name = section_name.replace(',', '|')
                        section_name = section_name.strip('(')
                        section_name = section_name.strip(')')

                    # Create the child ETL component by name and properties
                    child_comp = factory.create_obj(self.config_dict, section_name.strip())
                    child_comps.append(child_comp)
                etl_comp = Splitter(self.config_dict, child_comps)
            else:

                # Create the ETL component by name and properties
                etl_comp = factory.create_obj(self.config_dict, etl_section_name.strip())

            # Add component to end of Chain
            self.add(etl_comp)
def squishySplineIk(startLoc, endLoc):
    ikJoints = list()
    startJoint = pmc.createNode('joint')
    adv.alignObjects(startJoint, startLoc)

    endJoint = pmc.createNode('joint')
    adv.alignObjects(endJoint, endLoc)
    pmc.parent(endJoint, startJoint)

    startJoint.orientJoint('xzy', secondaryAxisOrient='zup')
    pmc.makeIdentity(endJoint, apply=True, jointOrient=True)

    Splitter.doSplit(startJoint, 10)

    ikJoints.append(startJoint)
    ikJoints.extend(reversed(startJoint.getChildren(ad=True, type='joint')))

    for i, ikj in enumerate(ikJoints):
        ikj.radius.set(2)
        ikj.rename('ikj_spine{0:02d}'.format(i))

    # Create second set of joints
    rigJoints = adv.makeDuplicateJoints(joints=ikJoints, search='ikj_', replace='local_rig_', connectBone=False)
    # HACK I haven't figured out how to create SDK nodes procedurally,
    # so making some dummy locs to make the curve I need
    a = pmc.createNode('transform')
    b = pmc.createNode('transform')

    pmc.setKeyframe(a.ty, t=0, value=2.5, inTangentType='flat', outTangentType='flat')
    pmc.setKeyframe(a.ty, t=10, value=0, inTangentType='flat', outTangentType='flat')
    pmc.keyTangent(a.ty, index=[0], inAngle=0)
    pmc.keyTangent(a.ty, index=[1], inAngle=-30)
    pmc.keyTangent(a.ty, index=[0], outAngle=0)
    pmc.keyTangent(a.ty, index=[1], outAngle=-30)

    animSquashCurve = a.ty.listConnections()[0]
    animSquashCurve.output.disconnect(a.ty)
    animSquashCurve.rename('squash_ramp')

    pmc.setKeyframe(a.tx, t=0, value=0, inTangentType='flat', outTangentType='flat')
    pmc.setKeyframe(a.tx, t=5, value=1, inTangentType='flat', outTangentType='flat')
    pmc.setKeyframe(a.tx, t=10, value=0, inTangentType='flat', outTangentType='flat')

    animTwistCurve = a.tx.listConnections()[0]
    animTwistCurve.output.disconnect(a.tx)
    animTwistCurve.rename('twist_ramp')

    pmc.delete(a, b)

    animControls = dict()
    animControls['lower_spine'] = adv.makeControlNode('ctl_lower_spine', targetObject=rigJoints[2], alignRotation=False)
    animControls['middle_spine'] = adv.makeControlNode('ctl_middle_spine')
    animControls['upper_spine'] = adv.makeControlNode('ctl_upper_spine', targetObject=rigJoints[-2],
                                                      alignRotation=False)

    animControls['lower_spine'][0].rotateOrder.set(adv.ROO_YXZ)
    animControls['middle_spine'][0].rotateOrder.set(adv.ROO_YXZ)
    animControls['upper_spine'][0].rotateOrder.set(adv.ROO_YXZ)

    pmc.pointConstraint(animControls['lower_spine'][0], animControls['upper_spine'][0],
                        animControls['middle_spine'][1], mo=False)

    pmc.orientConstraint(animControls['lower_spine'][0], animControls['upper_spine'][0],
                         animControls['middle_spine'][1], mo=False)

    splineIk = pmc.ikHandle(sj=ikJoints[0], ee=ikJoints[-1], sol='ikSplineSolver', parentCurve=False,
                            createCurve=True, simplifyCurve=True, numSpans=2, rootOnCurve=False, n='sik_spine')

    splineIkHandle = splineIk[0]
    spline = splineIk[2]
    spline.rename('crv_spine')
    clusterJoints = list()
    clusterJoints.append(pmc.createNode('joint', n='clj_spine0'))
    pmc.parentConstraint(animControls['lower_spine'][0], clusterJoints[-1])

    clusterJoints.append(pmc.createNode('joint', n='clj_spine1'))
    pmc.parentConstraint(animControls['middle_spine'][0], clusterJoints[-1])

    clusterJoints.append(pmc.createNode('joint', n='clj_spine2'))
    pmc.parentConstraint(animControls['upper_spine'][0], clusterJoints[-1])

    pmc.skinCluster(clusterJoints, spline, maximumInfluences=3)

    pmc.parentConstraint(animControls['lower_spine'][0], ikJoints[0], maintainOffset=True)

    for clj in clusterJoints:
        clj.radius.set(3)

    splineIkHandle.dTwistControlEnable.set(1)
    splineIkHandle.dWorldUpType.set(4)
    splineIkHandle.dWorldUpAxis.set(0)
    splineIkHandle.dWorldUpVector.set([0.0, 0.0, 1.0])
    splineIkHandle.dWorldUpVectorEnd.set([0.0, 0.0, 1.0])

    animControls['lower_spine'][0].worldMatrix[0].connect(splineIkHandle.dWorldUpMatrix)
    animControls['upper_spine'][0].worldMatrix[0].connect(splineIkHandle.dWorldUpMatrixEnd)

    normalizeNode = stretchySplineIk(splineIkHandle, useScale=True, globalScaleAttr='ctl_main.size')
    sqrtScale = pmc.createNode('multiplyDivide', n='sqrt_spine_scale')
    sqrtScale.operation.set(3)
    sqrtScale.input2X.set(0.5)
    normalizeNode.outputX.connect(sqrtScale.input1X)

    invScale = pmc.createNode('multiplyDivide', n='div_spine_inverse_scale')
    invScale.operation.set(2)
    invScale.input1X.set(1.0)
    sqrtScale.outputX.connect(invScale.input2X)

    jointGroups = list()
    for i, jnt in enumerate(rigJoints):
        preTransform = adv.zeroOut(jnt, 'pre')
        jointGroups.append(preTransform)

        ikNode = adv.zeroOut(jnt, 'hlp_ik')
        pmc.pointConstraint(ikJoints[i], ikNode)
        pmc.orientConstraint(ikJoints[i], ikNode)

        twistNode = adv.zeroOut(jnt, 'hlp_twist')
        twistCache = pmc.createNode('frameCache', n='frm_{0}_twist'.format(jnt))
        animTwistCurve.output.connect(twistCache.stream)
        twistCache.varyTime.set(i)

        rotateMultiplier = pmc.createNode('multiplyDivide', n='mul_{0}_twist'.format(jnt.shortName()))

        twistCache.varying.connect(rotateMultiplier.input2X)
        animControls['middle_spine'][0].rotateY.connect(rotateMultiplier.input1X)
        rotateMultiplier.outputX.connect(twistNode.rotateX)

        volumeCache = pmc.createNode('frameCache', n='frm_{0}_volume'.format(jnt.shortName()))
        animSquashCurve.output.connect(volumeCache.stream)
        volumeCache.varyTime.set(i)

        pow_ = pmc.createNode('multiplyDivide', n='pow_{0}'.format(jnt.shortName()))
        pow_.operation.set(3)
        invScale.outputX.connect(pow_.input1X)
        volumeCache.varying.connect(pow_.input2X)
        pow_.outputX.connect(jnt.scaleY)
        pow_.outputX.connect(jnt.scaleZ)

    pmc.group(animControls['lower_spine'][1], animControls['upper_spine'][1], animControls['middle_spine'][1],
              n='grp_spine_anim')
    pmc.group(splineIkHandle, spline, n='grp_spine_rig_systems')
    pmc.group(clusterJoints, startJoint, n='grp_spine_system_joints')
    pmc.group(jointGroups, n='grp_spine_bind_joints')

    return rigJoints
""" This splitter attempts to maximize apparent total surplus """
from splitter import Splitter
from splitter import Bid

class SurplusMaximizer(Splitter):
    def score(self, bid, averages):
        return bid.amount - averages[bid.item]

items = ["Room 1", "Room 2", "Room 3"]
bids = [Bid("Room 1", "Joey", 10), Bid("Room 1", "Josh", 15),
        Bid("Room 2", "Joey", 5), Bid("Room 2", "Josh", 0)]
s = Splitter()
print s.split(items, ["Joey", "Josh"], bids)
Example #31
0
from main import Main;
from data_set import Data_Set;
from dummy_master import Dummy_Master;
from regressor import Regressor;
from metrics import Metrics;
from back_elimination import Back_Eliminations;
from set_reader import Set_Reader;
from splitter import Splitter;
from plot import Plot;
from process_data import Pre_Process_Data;

import visual-python

m = Main('init');
r = Regressor();
sp = Splitter();
mt = Metrics();
m.print();
be = Back_Eliminations();
pd = Pre_Process_Data();

sr = Set_Reader();
sr.read_files();
# sr.print_files_shapes();
train = sr.get_train();
test = sr.get_test(); 

ploter = Plot();
ploter.cut_survived(train, test);
# ploter.plot_set_survived(sr.get_train(), "Sex", "Survived");
# ploter.plot_set_survived(sr.get_train(), "Pclass" ,"Survived");
Example #32
0
                          parameters['language'],
                          parameters['path_to_fmridata'],
                          input_path,
                          logger=logs,
                          **kwargs)
    logs.validate()

    logs.info("Retrieve arguments for each model...")
    kwargs_splitter = get_splitter_information(parameters)
    kwargs_compression = get_compression_information(parameters)
    kwargs_transformation = get_data_transformation_information(parameters)
    kwargs_estimator_model = get_estimator_model_information(parameters)
    logs.validate()

    logs.info("Instanciations of the classes...")
    splitter = Splitter(**kwargs_splitter)
    compressor = Compressor(**kwargs_compression)
    transformer = Transformer(**kwargs_transformation)
    estimator_model = EstimatorModel(**kwargs_estimator_model)
    logs.validate()

    logs.info("Defining Pipeline flow...")
    ## Pipeline
    splitter_cv_external = Task([splitter.split], name='splitter_cv_external')
    compressor_external = Task([compressor.compress],
                               input_dependencies=[splitter_cv_external],
                               name='compressor_external',
                               flatten_inputs=[True])
    transform_data_external = Task(
        [transformer.make_regressor, transformer.scale],
        input_dependencies=[splitter_cv_external, compressor_external],
class SplitterTest(TestCase):
    def setUp(self):
        self.s = Splitter()

    def test_returns_none_when_loot_is_undivisible_by_number_of_pirates(self):
        self.assertEqual(None,self.s.split([2,3],2))
        
    def test_returns_none_when_there_are_not_enough_gems(self):
        self.assertEqual(None,self.s.split([4,2],3))
        
    def test_returns_none_when_there_is_a_gem_greater_than_share(self):
        self.assertEqual(None,self.s.split([4,2,3],3))
        
    def test_everybody_gets_the_same_kind_of_bucket_when_we_have_only_one_type_of_gem(self):
        self.assertEqual([[2],[2]],self.s.split([2,2],2))
        self.assertEqual([[2],[2],[2]],self.s.split([2,2,2],3))
        self.assertEqual([[2,2],[2,2]],self.s.split([2,2,2,2],2))
        
    def test_everybody_gets_the_same_kind_of_bucket_when_we_have_the_same_set_of_gem_and_pirates(self):
        self.assertEqual([[3,2],[3,2]],self.s.split([3,2,3,2],2))
        self.assertEqual([[3,2],[3,2],[3,2]],self.s.split([3,2,3,2,3,2],3))
        
    def test_everybody_gets_the_same_value_with_a_different_number_of_gems(self):
        self.assertEqual([[3],[2,1]],self.s.split([1,2,3],2))
        self.assertEqual([[3,2,2,2],[3,2,2,2],[3,2,2,2]],self.s.split([3,3,3,2,2,2,2,2,2,2,2,2],3)) #famoso caso da morte
        self.assertEqual([[7],[5,2]],self.s.split([7,5,2],2))
    
    def test_should_not_create_a_bucket_greater_than_share(self):
        self.assertEqual([[13,1],[7,7]],self.s.split([13,7,7,1],2))
        
    def test_should_rollback_when_the_first_decision_doesnt_fit(self):
        self.assertEqual([[7, 2, 2],[3, 3, 3, 2]],self.s.split([7, 3, 3, 3, 2, 2, 2],2))
        
    def test_should_rollback_when_the_second_decision_doesnt_fit_also(self):
        self.assertEqual([[7, 2, 2, 2],[3, 3, 3, 2, 2]],self.s.split([7, 3, 3, 3, 2, 2, 2, 2, 2],2))
        
        
 def setUp(self):
     self.s = Splitter()
from splitter import Splitter


def merge_all():
    for i, fname in enumerate(os.listdir('output')):
        if i == 0:
            df = pd.read_csv('output/' + fname)
        else:
            df = pd.merge(df, pd.read_csv('output/' + fname),
                          how='outer', on='datetime')
    df.to_csv('health_care.csv')


if __name__ == "__main__":

    print("Convert apple health care xml to csv.")

    s = Splitter()

    s.get_body_mass()
    s.get_burned_energy()
    s.get_heart_rate()
    s.get_stand_time()
    s.get_step_count()
    s.get_walking_distance()

    print("Merge all csv.")
    merge_all()

    print("Done.")
Example #36
0
        return item
    """

    def select_item(self, splitter, user):
        max_item = self.actions[0]
        max_val = np.random.beta(
            max_item.successes + self.alpha,
            max_item.count - max_item.successes + self.beta)
        for item in self.actions[1:]:
            if not (user in splitter.train_set.keys() and item.item in splitter.train_set[user])\
                    or self.follow_back(splitter, item.item, user):
                val = np.random.beta(item.successes + self.alpha,
                                     item.count - item.successes + self.beta)
                if val > max_val:
                    max_item = item
                    max_val = val
        self.removed = 1
        return max_item


if __name__ == "__main__":
    from splitter import Splitter
    from plot import plot_results_graph
    import matplotlib.pyplot as plt

    spl = Splitter("../data/movieLens_binary_mini.dat", separator=" ")
    bandit = UCBBandit(spl, "mini", param=0, criteria="mean")
    print(len(bandit.actions))
    plot_results_graph("mini", "eps")
    plt.show()
Example #37
0
 def visitTextFile(self, textfile):
     splitter = Splitter(textfile.filePath, len(self.workers))
     file_split_result = splitter.split()
     self.operations[textfile.id] = FilePartition(textfile.id,
                                                  len(self.workers), file_split_result, textfile.filePath)
     self._set_collect_count(textfile)