コード例 #1
0
def plot(sdss_path, dflens_path, out_path):
    sdss_data = splitter.load(sdss_path)
    sdss_data, _ = splitter.split(sdss_data, sdss_data[0].shape[0], 0)
    _, sdss_y = sdss_data

    dflens_data = splitter.load(dflens_path)
    dflens_data, _ = splitter.split(dflens_data, dflens_data[0].shape[0], 0)
    _, dflens_y = dflens_data

    fig, (plot1, plot2) = plt.subplots(2, 1)

    data_min = min(np.min(sdss_y), np.min(dflens_y))
    data_max = 1
    space = np.linspace(data_min, data_max, 100)

    plot1.hist(sdss_y, bins=space)
    plot1.set_xlim((data_min, data_max))
    plot1.set_ylabel('SDSS Count')
    plot1.set_xticks([])

    plot2.hist(dflens_y, bins=space)
    plot2.set_xlim((data_min, data_max))
    plot2.set_ylabel('2dFLenS Count')
    plot2.set_xlabel(r'$z_\mathrm{spec}$')

    plt.subplots_adjust(hspace=0)
    plt.savefig(out_path)
コード例 #2
0
def split_and_send(update, context):
    splitter.split(n=context.chat_data['n'], split_axis=context.chat_data['axis'])
    for filename in os.listdir("output"):
        context.bot.send_photo(chat_id=update.effective_chat.id, photo=open(f'output/{filename}', 'rb'))
        os.remove(f'output/{filename}')
    os.remove('input/input_img.jpg')
    
    # Reset parameters stored
    context.chat_data['n'] = False
    context.chat_data['axis'] = False
    context.chat_data['img_received'] = False
コード例 #3
0
def test_split_incorrectly_capitalized():
    split_result = split('StartEreignis', 'de_de')
    assert split_result == '', '%s != ""' % split_result

    # should split unicode strings
    split_result = split(u'Resultatveröffentlichung', 'de_de')
    assert split_result[0] == u'Resultat', '%s != "Resultat"' % (
        split_result[0])
    assert split_result[
        1] == u'Veröffentlichung', '%s != "Veröffentlichung"' % (
            split_result[1])
コード例 #4
0
 def separatewords(self, text):
     # print(f"{text}")
     try:
         return [s.lower() for s in splitter.split(text) if s != '']
     except:
         print(f"separatewords{text}")
         return []
コード例 #5
0
ファイル: scanner.py プロジェクト: sandhawke/ldreg
 def show_by_ns(self):
     ns_count = {}
     for (term, use) in (self.term_uses.keys()):
         (ns, local) = splitter.split(term)
         if ns and local:
             incr(ns_count, ns)
     print ns_count
コード例 #6
0
def process(source, scene_root, verbose=False, clean=False, list_file=None,
            overwrite=False):

    if pusher.check_existance(scene_root):
        print 'Scene %s already exists on destination bucket.' % scene_root
        if not overwrite:
            return collect_missing_entry(scene_root, verbose, clean, list_file)

    if verbose:
        print 'Processing scene: %s' % scene_root
        
    scene_dict = {}
    
    local_tarfile = puller.pull(source, scene_root, scene_dict,
                                verbose=verbose)

    local_dir = splitter.split(scene_root, local_tarfile, verbose=verbose)

    scene_info.add_mtl_info(scene_dict, scene_root, local_dir)
    
    thumbnailer.thumbnail(scene_root, local_dir, verbose=verbose)
    scene_index_maker.make_index(scene_root, local_dir, verbose=verbose)
    pusher.push(scene_root, local_dir, scene_dict, verbose=verbose, overwrite=overwrite)

    if clean:
        os.unlink(local_tarfile)
        shutil.rmtree(local_dir)

    if list_file:
        scene_info.append_scene_line(list_file, scene_dict)

    return scene_dict
コード例 #7
0
ファイル: scanner.py プロジェクト: sandhawke/ldreg
    def db_finish(self):
        now = time.time()
        self.db.update('scan', where="id = "+str(self.id),
                       triples=self.c, 
                       time_complete=now,
                       status=1)
        
        for (term, use) in (self.term_uses.keys()):
            (ns, local) = splitter.split(term)
            nsid = irimap.to_id(self.db, ns)

            self.db.insert('term_use',
                           local=local,
                           namespace_id = nsid,
                           scan_id = self.id,
                           type = use,
                           count = self.term_uses[(term,use)]
                           )

        for t in self.primary_trackers:
            self.db.insert('trackers',
                           scan_id = self.id,
                           tracker_id = irimap.to_id(self.db, t),
                           is_primary = True
                           )
        for t in self.backup_trackers:
            self.db.insert('trackers',
                           scan_id = self.id,
                           tracker_id = irimap.to_id(self.db, t),
                           is_primary = False
                           )


        obsolete_old_scans(self.db, self.data_source_iri, self.id)
コード例 #8
0
ファイル: tracker.py プロジェクト: sandhawke/ldreg
def list_(term, timecode=(-1), db=None):   # , limit, offset):
    """

    see http://dev.mysql.com/doc/refman/5.0/en/select.html

    for us, timecodes ARE scanids.   A new scanid == a new change.

    are we going to need to SORT when we do limit/offset?   pagerank?

    """
    if db is None:
        db = dbconn.Connection()

    (ns, local) = splitter.split(term)
    ns_id = irimap.to_id(db, ns)

    if timecode == -1:
        timecode = latest_timecode(db)
        print "list using latest timecode:", timecode
    else:
        if timecode < min_timecode(db):
            raise GarbageTimecode()

    for r in db.query('select text, type from term_use, scan, iri where scan_id <= $timecode and obsoleted_by > $timecode and namespace_id=$ns_id and scan.id=scan_id and status=1 and local=$local and iri.id=source_id', vars=locals()):
        yield unicode(r.type)+" "+unicode(r.text)
コード例 #9
0
def prove_terminates(filename):
    splitfile = tempfile.NamedTemporaryFile(mode='w',
                                            suffix='.c',
                                            delete=False)
    (id_map, has_nested, nondet) = splitter.split(filename, splitfile)
    nids = len(id_map)
    varnames = ' '.join(id_map[k] for k in xrange(nids))

    splitfile.close()

    if has_nested:
        skeleton = "../../tests/termination/nested.c"
        nprogs = 3
        nargs = nids * 2
        varnames += ' ' + ' '.join("%s\\'" % id_map[k] for k in xrange(nids))
    else:
        skeleton = "../../tests/termination/ranking.c"
        nprogs = 2
        nargs = nids

    cmd = (("kalashnikov.py " + "%s %s " + "-P%d  " +
            "--seed=1337 -a%d --varnames %s " + "--synth-strategy=genetic " +
            "-c1 " + "--fastverif=True " + "-newsize=5 " +
            "-replaceprob=0.15 " + "-mutprob=0.1 " + "-tourneysize=5 " +
            "-popsize=3000 " + "-w4 " + "%s") %
           (splitfile.name, skeleton, nprogs, nargs, varnames, ' '.join(
               sys.argv[2:])))
    os.system(cmd)
コード例 #10
0
def rewrite_token(t):
    """ Rewrites every single token either as a boolean operator or in a format for further processing"""
    d = {"AND": "&", "OR": "|", "NOT": "1 -", "(": "(", ")": ")"}

    # Hi guys... Here is some code for you. Made it extra spaghetti this time with hint of spice and double cheese, just get your brains working uknow? I'm just thinking about you and your wellbeing and not being very bad at coding. This comment is also very long, just so because I don't want you to read the code below. You can just continue reading this comment, or actually just skip it. Or skip the whole code while we are at it. Yes? Good? Okay you can now delete this comment, thanks.
    if t in d:
        return d.get(t)
    if t[0] == "\"" and t[-1] == "\"":
        t = t.replace("\"", "")
        try:
            t_ngram = ' '.join([str(t.lower()) for t in splitter.split(t)])
            if t_ngram in terms:
                return 'sparse_td_matrix[t2i["{:s}"]].todense()'.format(
                    t_ngram)
            else:
                if t_ngram == "":
                    unknownword_list.append(t)
                else:
                    unknownword_list.append(t_ngram)
                return 'np.matrix(np.zeros(len(documents), dtype=int))'
        except:
            unknownword_list.append(t)
            return 'np.matrix(np.zeros(len(documents), dtype=int))'
    elif t.lower() in terms:
        return 'sparse_td_matrix[t2i["{:s}"]].todense()'.format(t.lower())
    else:
        if t[0] == "\"" or t[-1] == "\"":
            pass
        else:
            unknownword_list.append(t)
            return 'np.matrix(np.zeros(len(documents), dtype=int))'
コード例 #11
0
def load_data(path='data/us_trial', max_example=None):
    """
        Load data from '{path}.{text, labels}'
    """
    num_examples = 0
    tweets, emojis = [], []
    f_x, f_y = open(path + '.text', 'r'), open(path + '.labels', 'r')
    while True:
        tweet, emoji = f_x.readline(), f_y.readline()
        if not tweet or not emoji:
            break

        # TODO: extra preprocessing step for each tweet e.g. take care of slang
        tweet = tweet.strip().lower()  # delete whitespaces and lowercase
        tweet = tweet[:-1] if tweet[
            -1] == u'\u2026' else tweet  # and more... -> more (... is a special unicode char)
        tweet = re.sub(
            r"#\S+", lambda match: ' '.join(splitter.split(match.group()[1:])),
            tweet)  #artfactory -> art factory
        tweet = re.sub(r"\b(\S*?)(.)\2{2,}\b", r"\1\2", tweet,
                       (re.MULTILINE | re.DOTALL))  # no wayyyyy -> no way
        tweet = tweet.translate({ord(c): None
                                 for c in '@#'})  # @ user -> user, #omg -> omg
        words = TweetTokenizer().tokenize(tweet)
        tweet = ' '.join(words)

        tweets.append(tweet)
        emojis.append(int(emoji))  # convert '7' -> 7

        num_examples += 1
        if (max_example is not None) and (num_examples >= max_example):
            break
    print("%d examples loaded" % num_examples)
    return tweets, emojis
コード例 #12
0
def grab_profiles(player_profile: Profile, stage: int, num_stages: int,
                  output_file_name: str, tempdir: str) -> int:
    """Parse output/result files from previous stage and get number of profiles to simulate"""
    stage_dir = get_subdir(stage, tempdir)
    os.makedirs(stage_dir, exist_ok=True)
    if stage == 1:
        num_generated_profiles = splitter.split(output_file_name, stage_dir,
                                                settings.splitting_size,
                                                player_profile.player_class)
    else:
        subdir_previous_stage = get_subdir(stage - 1, tempdir)
        try:
            checkResultFiles(subdir_previous_stage)
        except Exception as e:
            msg = "Error while checking result files in {}: {}\nPlease restart AutoSimc at a previous stage.". \
                format(subdir_previous_stage, e)
            raise RuntimeError(msg) from e
        if settings.default_grabbing_method == "target_error":
            filter_by = "target_error"
            filter_criterium = None
        elif settings.default_grabbing_method == "top_n":
            filter_by = "count"
            filter_criterium = settings.default_top_n[stage - num_stages - 1]
        is_last_stage = (stage == num_stages)
        num_generated_profiles = splitter.grab_best(
            filter_by, filter_criterium, subdir_previous_stage, stage_dir,
            output_file_name, not is_last_stage)
    if num_generated_profiles:
        logging.info(
            "Found {} profile(s) to simulate.".format(num_generated_profiles))
    return num_generated_profiles
コード例 #13
0
ファイル: nonterm.py プロジェクト: blexim/synth
def prove_terminates(filename):
  splitfile = tempfile.NamedTemporaryFile(mode='w', suffix='.c', delete=False)
  (id_map, has_nested, nondet) = splitter.split(filename, splitfile)
  nids = len(id_map)
  varnames = ' '.join(id_map[k] for k in xrange(nids))

  splitfile.close()

  os.system(("kalashnikov.py " +
             "%s ../../tests/termination/unranking.c " +
             "-P2 " +
             "--synth-strategy=genetic " +
             "-c1 " +
             "--fastverif=True " +
             "-newsize=5 " +
             "-replaceprob=0.15 " +
             "-mutprob=0.1 " +
             "-tourneysize=5 " +
             "-popsize=3000 " +
             "-w4 " +
             "-a%d --evars %d --varnames %s --resnames I --seed=1337 " +
             "--nondet=%d " +
             "%s") % 
              (splitfile.name,
                nids,
                nids,
                varnames,
                nondet,
                ' '.join(sys.argv[2:])))
コード例 #14
0
ファイル: inspect.py プロジェクト: melug/cyrillic-mongolian
def follow_up_char(w):
    ''' Rule 9, Defines character of suffixes or vowels.
    а, у        ->  а
    э, ү, и     ->  э
    о           ->  о
    ө           ->  ө
    Exception, following alphabets changes follow up character
    уу, үү, юу, юү, яу, ёу, еү, иу
    Following alphabets may go in the middle word but doesn't change follow up character.
    и, ий, ы, эй
    '''
    dd = {
        u'а': u'а',
        u'у': u'а',
        u'э': u'э',
        u'ү': u'э',
        u'и': u'э',
        u'о': u'о',
        u'ө': u'ө',
    }
    ee = [ u'уу', u'үү', u'юу', u'юү', u'яу', u'ёу', u'еү', u'иу' ]
    ii = [ u'и', u'ий', u'ы', u'эй' ]
    fu = None
    for u in split(w.lower()):
        found = False
        for e in ee:
            if e in u:
                fu = dd[e[-1]]
                found = True
        if not found and fu is None:
            for k in dd.keys():
                if k in u:
                    fu = dd[k]
    return fu
コード例 #15
0
def getwords(doc):
    splitter = re.compile('\\W*')
    # 単語を非アルファベットの文字で分割する
    words = [s.lower() for s in splitter.split(doc)
             if len(s) > 2 and len(s) < 20]
    # ユニークな単語のみの集合を返す
    return dict([(w, 1) for w in words])
コード例 #16
0
ファイル: main.py プロジェクト: mikeonly/WebstersBot
 def reply(text=None):
     if text:
         resp = ''
         for part in splitter.split(text, MAX):
             resp += part
             check = sender.send(
                         'POST',
                         URL + TOKEN + '/' + 'sendMessage',
                         {
                         'text': part.encode('utf-8'),
                         'chat_id': chat_id
                         }
                         )
             if check:
                 logging.error(check.read())
             else:
                 logging.info('Sent response: \n\n%s' % resp)
     else:
         sender.send(
                     'POST',
                     URL + TOKEN + '/' + 'sendMessage',
                     {
                     'text': 'Bot tries to send you an empty message.',
                     'chat_id': chat_id
                     }
                     )
コード例 #17
0
def grab_profiles_for_stage(player_profile, stage, outputfile, stages):
    """Parse output/result files from previous stage and get number of profiles to simulate"""
    subdir_previous_stage = get_subdir(stage - 1)
    if stage == 1:
        num_generated_profiles = splitter.split(outputfile, get_subdir(stage),
                                                settings.splitting_size,
                                                player_profile.wow_class)
    else:
        try:
            check_results_file(subdir_previous_stage)
        except Exception as ex:
            msg = f'Error while checking result files in {subdir_previous_stage}: {ex}. Please restart AutoSimc at a previous stage.'
            raise RuntimeError(msg) from ex
        if settings.default_grabbing_method == 'target_error':
            filter_by = 'target_error'
            filter_criterium = None
        elif settings.default_grabbing_method == 'top_n':
            filter_by = 'count'
            filter_criterium = settings.default_top_n[stage - stages - 1]
        is_last_stage = (stage == stages)
        num_generated_profiles = splitter.grab_best(filter_by,
                                                    filter_criterium,
                                                    subdir_previous_stage,
                                                    get_subdir(stage),
                                                    outputfile,
                                                    not is_last_stage)
    if num_generated_profiles:
        logger.info(f'Found {num_generated_profiles} profile(s) to simulate.')
    return num_generated_profiles
コード例 #18
0
def test_split_german_three_compounds():
    split_result = split('Effektivitätsberechnungsformular', 'de_de')
    assert split_result[0] == 'Effektivität', '%s != "Effektivität"' % (
        split_result[0])
    assert split_result[1] == 'Berechnung', '%s != "Berechnung"' % (
        split_result[1])
    assert split_result[2] == 'Formular', '%s != "Formular"' % (
        split_result[2])
コード例 #19
0
ファイル: test_appx_gp.py プロジェクト: kgmacau/mclass-sky
def main(path_in):
    print('Loading data...')
    data = splitter.load(path_in)
    (train_X, train_y), (test_X,
                         test_y) = splitter.split(data, TRAINING_NUM,
                                                  TESTING_NUM)

    try:
        gp_sigmas = np.loadtxt('gp_preds.txt')
        assert gp_sigmas.shape == (TESTING_NUM, )
    except (FileNotFoundError, AssertionError):
        print('Fitting GP...')
        kernel = sklearn.gaussian_process.kernels.RBF(
            length_scale=LENGTH_SCALE)
        gp = sklearn.gaussian_process.GaussianProcessRegressor(
            kernel=kernel, alpha=ALPHA, copy_X_train=False)
        gp.fit(train_X, train_y)

        print('Predicting GP...')
        _, gp_sigmas = gp.predict(test_X, return_std=True)

        np.savetxt('gp_preds.txt', gp_sigmas)

    print('Approximating kernel...')
    appx_train_X, appx_test_X = approximate_kernel(train_X, test_X)

    print('Fitting approximate GP...')
    agp = appx_gp.AppxGaussianProcessRegressor(alpha=ALPHA)
    agp.fit(appx_train_X, train_y)

    print('Predicting approximate GP...')
    _, agp_sigmas = agp.predict(appx_test_X, return_std=True)

    print('Finding best fit...')
    best_fit = np.polyfit(gp_sigmas, agp_sigmas, 1)
    best_fit_box = (min(gp_sigmas), max(gp_sigmas), min(agp_sigmas),
                    max(agp_sigmas))
    best_fit_endpoints = interval_in_box_from_line(best_fit_box, best_fit)
    best_fit_xs, best_fit_ys = zip(*best_fit_endpoints)

    print('Plotting...')
    f = plt.figure()
    ax = f.add_subplot(111)
    sc = plt.scatter(gp_sigmas, agp_sigmas, s=.2, c=list(test_y))
    plt.plot(best_fit_xs, best_fit_ys, color='red', label='Linear fit')
    plt.title(r'$\gamma = {:.4},$ #components$= {}$'.format(GAMMA, COMPONENTS))
    plt.xlabel('GP uncertainty')
    plt.ylabel('Approximate GP uncertainty')
    plt.text(.975,
             .1,
             '$y = {:.4}x {:+.4}$'.format(*best_fit),
             horizontalalignment='right',
             verticalalignment='bottom',
             transform=ax.transAxes)
    colorbar = plt.colorbar(sc)
    colorbar.set_label('Redshift')
    plt.legend(loc='lower right')
    plt.show()
コード例 #20
0
def cronista_transcribe(audio_source_path: str,
                        destination_folder: str,
                        block_of_transcription: int,
                        lang: str,
                        on_transcription_progress=None,
                        to_file: bool = True):
    audio_segments = split(audio_source_path, destination_folder,
                           block_of_transcription, to_file)
    transcribe(audio_segments, lang, on_transcription_progress)
コード例 #21
0
 def split_compound(self, sentance):
     """
         Used to split compound words that are found in the utterance
         This will make it easier to confirm that all words are found in the search
     """
     search_words = re.split(r'\W+', str(sentance))
     separator = " "
     words_list = splitter.split(separator.join(search_words))
     return words_list
コード例 #22
0
ファイル: register.py プロジェクト: sandhawke/ldreg
def list_(term):
    print "Looking for trackers for term"
    (ns, local) = splitter.split(term)
    ns_trackers = determine_trackers(ns)
    for t in ns_trackers:
        try:
            print "Asking "+t
            return tracker_call(t, "list", term=term)
        except TrackerFailure:
            print t+": failed, moving on..."
コード例 #23
0
ファイル: naivebayes.py プロジェクト: kurozumi/NaiveBayes
    def predict_(self, sentence):
        # arg max log(P(category| sentence))
        best_suggested_category = None
        max_probability = -float('inf')
        words = splitter.split(sentence)

        for category in self.word_count.keys():
            probability = self.calc_score(words, category)
            if (probability > max_probability):
                max_probability = probability
                best_suggested_category = category
        return best_suggested_category
コード例 #24
0
    def predict_(self, sentence):
        # arg max log(P(category| sentence))
        best_suggested_category = None
        max_probability = -float('inf')
        words = splitter.split(sentence)

        for category in self.word_count.keys():
            probability = self.calc_score(words, category)
            if(probability > max_probability):
                max_probability = probability
                best_suggested_category = category
        return best_suggested_category
コード例 #25
0
def process_hashtags(original_text, mode=NORMAL_MODE):
    cleaned_text = original_text
    hashtags = set(re.findall(r"#(\w+)", original_text))
    cleaned_text = cleaned_text.replace('#', ' ')
    for hashtag in hashtags:
        if mode == AGGRESSIVE_MODE:
            cleaned_text = cleaned_text.replace(
                hashtag, ' '.join(splitter.split(hashtag)))
        else:
            cleaned_text = cleaned_text.replace(hashtag,
                                                ' '.join(segment(hashtag)))

    return cleaned_text
コード例 #26
0
def main():
    path = '../dataset/powersupply.arff'
    #data, meta = arff.loadarff(path)
    #data, label, maxFeature = getArffData(path,1000)
    #train, trainBeta, test = generateTrain(data, 100)
    train, train_beta, test = split('../dataset/powersupply.arff', 500)
    maxFeature = len(train[0])
    sampleSize = len(train)
    #print "sampleSize:" , sampleSize
    gammab = computeKernelWidth(np.array(train))
    result = testEnsKmm(train,test,gammab,10,maxFeature)
    beta = result[0]
    print "beta",beta
コード例 #27
0
def main():
    path = '../dataset/powersupply.arff'
    #data, meta = arff.loadarff(path)
    #data, label, maxFeature = getArffData(path,1000)
    #train, trainBeta, test = generateTrain(data, 100)
    train, train_beta, test = split('../dataset/powersupply.arff', 500)
    maxFeature = len(train[0])
    sampleSize = len(train)
    #print "sampleSize:" , sampleSize
    gammab = computeKernelWidth(np.array(train))
    result = testEnsKmm(train,test,gammab,10,maxFeature)
    beta = result[0]
    print "beta",beta
コード例 #28
0
def findBest(tensor, threshold):
    t = np.copy(tensor)
    best = [1e100, None]
    for i in range(len(tensor.shape) - 1):
        for j in range(i + 1, len(t.shape)):
            u, s, v, vs = split(t, (i, j), threshold)
            if len(s) < best[0] and u.size + vs.size < t.size:
                best = [len(s), u, vs]
    best = best[1:]
    if best[-1] is None:
        return [t]
    elif len(best[-1].shape) > 3:
        best = best[:-1] + findBest(best[-1], threshold)
    return best
コード例 #29
0
    def read_text(self, stream):
        lines = stream.readlines()

        i = 1

        for line in lines:
            lower_line = line.lower()
            words = splitter.split(lower_line,
                                   [' ', ',', '.', ':', ';', '\n', '\r', '\t'])

            for word in words:
                self.add_word(word, i)

            i += 1
コード例 #30
0
ファイル: handle.py プロジェクト: ashupednekar/piplineUtils
def handlespace(word):
    s, flag = audit(word)
    if flag:
        # print('1')
        index = word.find(s)
        # print(index)
        topass = word[:index] + ' ' + word[index:index +
                                           len(s)] + ' ' + word[index +
                                                                len(s):]
    else:
        # print('2')
        topass = word
    print('penultimate...', topass)
    return ' '.join(
        [x.strip() for x in splitter.split('fixed  deposit ', 'en_gb')])
コード例 #31
0
def strip_hashtags(text):
    text = preprocess_clean(text,False,True)
    hashtags = re.findall('#[\w\-]+', text)
    for tag in hashtags:
        cleantag = tag[1:]
        if d.check(cleantag) or dus.check(cleantag):
            text = re.sub(tag,cleantag,text)
            pass
        else:
            hashtagSplit = ""
            for word in splitter.split(cleantag.lower(),'en_US'):
                hashtagSplit = hashtagSplit + word + " "
            text = re.sub(tag,hashtagSplit,text)
    #print(text)
    return text
コード例 #32
0
def process(source,
            scene_root,
            verbose=False,
            clean=False,
            list_file=None,
            overwrite=False):

    if pusher.check_existance(scene_root):
        print 'Scene %s already exists on destination bucket.' % scene_root
        if not overwrite:
            return collect_missing_entry(scene_root, verbose, clean, list_file)

    if verbose:
        print 'Processing scene: %s' % scene_root

    scene_dict = {}

    local_tarfile = puller.pull(source,
                                scene_root,
                                scene_dict,
                                verbose=verbose)

    try:
        local_dir = splitter.split(scene_root, local_tarfile, verbose=verbose)
    except:
        if source == 's3queue':
            # Remove problematic scenes from the queue directory
            puller_s3queue.clean_queued_tarfile(scene_root)
            return

    scene_info.add_mtl_info(scene_dict, scene_root, local_dir)

    thumbnailer.thumbnail(scene_root, local_dir, verbose=verbose)
    scene_index_maker.make_index(scene_root, local_dir, verbose=verbose)
    pusher.push(scene_root,
                local_dir,
                scene_dict,
                verbose=verbose,
                overwrite=overwrite)

    if clean:
        os.unlink(local_tarfile)
        shutil.rmtree(local_dir)

    if list_file:
        scene_info.append_scene_line(list_file, scene_dict)

    return scene_dict
コード例 #33
0
def main(path_in, path_out):
    data = splitter.load(path_in)
    (train_X, train_y), (test_X, test_y) \
        = splitter.split(data, TRAINING_SAMPLES_NUM, TESTING_SAMPLES_NUM)

    gp_x = list(range(STEP, MAX_GP + 1, STEP))
    gp_y = []
    for i, n in enumerate(gp_x):
        print('Starting GP', i + 1)
        gp_y.append(perform_gp(train_X[:n], train_y[:n], test_X))

    with open(path_out, 'w') as f:
        json.dump({
            'gp_x': gp_x,
            'gp_y': gp_y,
        }, f)
コード例 #34
0
def get_hashtag_tokenize(hash_tag):
    hashtag = hash_tag
    hash_tag = hash_tag.replace('#', '')
    hash_tag = ' '.join(hash_tag.split())
    if len(hash_tag) > 0:
        hashtag = splitter.split(hashtag)
        hashtag = ' '.join(word for word in hashtag)
        if len(hashtag) > 1:
            wordlist = initialize_words()
            return parse_sentence(hashtag, wordlist)

        else:
            return hash_tag.lower()
            # wordlist = initialize_words()
            # return parse_sentence(hashtag, wordlist)
    else:
        return hash_tag
コード例 #35
0
def prove_terminates(filename):
    splitfile = tempfile.NamedTemporaryFile(mode='w',
                                            suffix='.c',
                                            delete=False)
    (id_map, has_nested, nondet) = splitter.split(filename, splitfile)
    nids = len(id_map)
    varnames = ' '.join(id_map[k] for k in xrange(nids))

    splitfile.close()

    os.system(
        ("kalashnikov.py " + "%s ../../tests/termination/unranking.c " +
         "-P2 " + "--synth-strategy=genetic " + "-c1 " + "--fastverif=True " +
         "-newsize=5 " + "-replaceprob=0.15 " + "-mutprob=0.1 " +
         "-tourneysize=5 " + "-popsize=3000 " + "-w4 " +
         "-a%d --evars %d --varnames %s --resnames I --seed=1337 " +
         "--nondet=%d " + "%s") %
        (splitfile.name, nids, nids, varnames, nondet, ' '.join(sys.argv[2:])))
コード例 #36
0
ファイル: thesisplots.py プロジェクト: kgmacau/mclass-sky
def get_colours(path):
    data = splitter.load(path)
    data, _ = splitter.split(data, data[0].shape[0], 0)
    X, y = data

    u, g, r, i, z = X.T
    u_g = u - g
    u_r = u - r
    r_i = r - i
    i_z = i - z

    X[:, 0] = r
    X[:, 1] = u_g
    X[:, 2] = u_r
    X[:, 3] = r_i
    X[:, 4] = i_z
    data = X, y
    return data
コード例 #37
0
ファイル: __main__.py プロジェクト: Azarattum/AutoHandwriter
def main():
    if (len(argv) < 2):
        print("Usage: py . <pdf>/validate")
        exit()
    source = argv[1]
    path = "./data/images/"
    validate = len(argv) >= 2 and argv[1] == "validate"

    images = [path + x for x in listdir(path)]
    img = cv.imread(images[0])

    if not validate:
        print("Processing the pdf (this might take a while)...")
        source = extract_text(source)
        source = re.sub("\n+", "\n", source).strip()
        source = randomize(source)
        source = split(img, source)
    else:
        print("Validating the dataset...")
    lines = source.split("\n")
    lines = list(filter(lambda line: len(str(line)) > 3, lines))

    # Lines per page
    lpp = 19
    total = ceil(len(lines) / lpp) if not validate else len(images)
    print("Pages will be generated: {}".format(total))

    for i, image in enumerate(images):
        img = cv.imread(image)

        text = "\n".join(lines[i * lpp:i * lpp + lpp])
        if (not text and not validate):
            break
        img, points = detect(img, validate)

        img, _ = handwrite(img, text, points)
        img = apply_effects(img)
        img = np.int0(img * 255)

        cv.imwrite("./out/{}".format(basename(image)), img)
        print("Generated: {}/{}    ".format(i + 1, total), end="\r")

    print("\nDone!")
コード例 #38
0
ファイル: brute.py プロジェクト: adamjermyn/TensorDecomp
def findBest(tensor, threshold):
	best = [1e100, None]
	trees = constructTrees(len(tensor.shape))
	print(len(trees))

	for i,t in enumerate(trees):
		if i%50 == 0:
			print(i, len(trees))
		v = np.copy(tensor)
		s = treeToSplit(t)
		arrs = []
		for indices in s:
			u,s,v,vs = split(v, indices, threshold)
			arrs.append(u)
			if len(v.shape) == 3:
				arrs.append(vs)
		if sum([a.size for a in arrs]) < best[0]:
			best = [sum([a.size for a in arrs]), t, arrs]

	return best
コード例 #39
0
def find_all(mstring):

    # group 3 captures alphanumeric substring from phone number
    m = re.match(r'^(1)?(\d{3})?([A-Z0-9]*)$', "".join(mstring))
    if m:
        # if alpanumeric substring exist
        if m.group(3):
            sublist = splitter.split(m.group(3))
            # if english word combination exist
            if sublist:
                mlist = list(m.groups())
                # concatenate english separated sublist
                mlist = mlist[:-1] + list(sublist)
                # Remove any None or empty elements from regrex groups
                mlist = [x for x in mlist if x not in [None, '']]
                # add dashes between elments
                newmlist = ['-'] * (len(mlist) * 2 - 1)
                newmlist[0::2] = mlist
                print("".join(newmlist))
                return "".join(newmlist)
コード例 #40
0
def maintest():
    path = '../dataset/powersupply.arff'
    #data, meta = arff.loadarff(path)
    #data, label, maxFeature = getArffData(path,1000)
    train, train_beta, test = split('../dataset/powersupply.arff', 300)
    #train, trainBeta, test = generateTrain(data, 300)
    train_data = np.array(train)
    test_data = np.array(test)
    maxFeature = train_data.shape[1]
    #maxFeature = len(train[0])
    print "maxFeature:" , maxFeature
    gammab = computeKernelWidth(train_data)
    print "gammab", gammab
    
    #print 'Converting train sparse to array ...'
    #Xtrain = convertSparseToList(train, maxFeature)
    #print 'Converting test sparse to array ...'
    #Xtest = convertSparseToList(test, maxFeature)
    beta,runtime = cenKmm(train_data, test_data, gammab, maxFeature)
    print beta
コード例 #41
0
def maintest():
    path = '../dataset/powersupply.arff'
    #data, meta = arff.loadarff(path)
    #data, label, maxFeature = getArffData(path,1000)
    train, train_beta, test = split('../dataset/powersupply.arff', 300)
    #train, trainBeta, test = generateTrain(data, 300)
    train_data = np.array(train)
    test_data = np.array(test)
    maxFeature = train_data.shape[1]
    #maxFeature = len(train[0])
    print "maxFeature:" , maxFeature
    gammab = computeKernelWidth(train_data)
    print "gammab", gammab
    
    #print 'Converting train sparse to array ...'
    #Xtrain = convertSparseToList(train, maxFeature)
    #print 'Converting test sparse to array ...'
    #Xtest = convertSparseToList(test, maxFeature)
    beta,runtime = cenKmm(train_data, test_data, gammab, maxFeature)
    print beta
コード例 #42
0
ファイル: OCR.py プロジェクト: sloanesturz/cs231a-proj
def main():
	start = time.time()

	classifier = Classifier()
	classifier.train()

	print "Training finished at %ds\n" % (time.time() - start)

	i = 1
	for fn in os.listdir('./data/matthew/'):
		with codecs.open('./data/matthew/out/%d.txt' % i, 'w+', encoding='utf-8') as fw:
			i += 1
			print './data/matthew/%s' % fn
			chars = split('./data/matthew/%s' % fn)
			for _, line in chars.iteritems():
				for char in line:
					char = 255 - char
					charout = classifier.classify(char)
					fw.write(unicode(charout[0]))
				fw.write('\n')
		print "Finished Matthew %d at %ds\n" % (i, (time.time() - start))
コード例 #43
0
def prove_terminates(filename):
    splitfile = tempfile.NamedTemporaryFile(mode='w',
                                            suffix='.c',
                                            delete=False)
    (id_map, has_nested, nondet) = splitter.split(filename, splitfile)
    nids = len(id_map)
    varnames = (' '.join(id_map[k] for k in xrange(nids)) + ' ' +
                ' '.join("0" for i in xrange(nids)))

    splitfile.close()

    cmd = ((
        "kalashnikov.py " + "%s ../../tests/termination/combined.c " + "-P3 " +
        "--seed=1337 " +
        "--synth-strategy=genetic -a%d --evars %d --varnames %s --resnames I "
        +
        "--fastverif=True -c=1 -keepfrac=15 -mutprob=0.25 -newfrac=2 -popsize=500 "
        + "-recombprob=0.05 -tourneysize=10 -w=3 " + "%s") %
           (splitfile.name, nids * 2, nids, varnames, ' '.join(sys.argv[2:])))

    os.system(cmd)
コード例 #44
0
ファイル: termination.py プロジェクト: blexim/synth
def prove_terminates(filename):
  splitfile = tempfile.NamedTemporaryFile(mode='w', suffix='.c', delete=False)
  (id_map, has_nested, nondet) = splitter.split(filename, splitfile)
  nids = len(id_map)
  varnames = ' '.join(id_map[k] for k in xrange(nids))

  splitfile.close()

  if has_nested:
    skeleton = "../../tests/termination/nested.c"
    nprogs = 3
    nargs = nids*2
    varnames += ' ' + ' '.join("%s\\'" % id_map[k] for k in xrange(nids))
  else:
    skeleton = "../../tests/termination/ranking.c"
    nprogs = 2
    nargs = nids


  cmd = (("kalashnikov.py " +
             "%s %s " +
             "-P%d  " +
             "--seed=1337 -a%d --varnames %s " +
             "--synth-strategy=genetic " +
             "--fastverif=True " +
             "-c1 " +
             "-newsize=5 " +
             "-replaceprob=0.15 " +
             "-mutprob=0.1 " +
             "-tourneysize=5 " +
             "-popsize=3000 " +
             "-w4 " +
             "%s") % 
              (splitfile.name,
                skeleton,
                nprogs,
                nargs,
                varnames,
                ' '.join(sys.argv[2:])))
  os.system(cmd)
コード例 #45
0
ファイル: complexity.py プロジェクト: blexim/synth
def complexity(filename):
  splitfile = tempfile.NamedTemporaryFile(mode='w', suffix='.c', delete=False)
  (id_map, has_nested, nondet) = splitter.split(filename, splitfile)
  nids = len(id_map)
  varnames = (' '.join(id_map[k] for k in xrange(nids)) + ' bound')

  splitfile.close()

  cmd = (("kalashnikov.py " +
             "%s %s/tests/loops/complexity.c " +
             "-P2 " +
             "--seed=1337 " +
             #"--strategy=evolve " +
             "-a%d --evars 0 --varnames %s --nondet=%d -w=3 " +
             "%s") % 
              (splitfile.name,
                kalashnikov_dir,
                nids + 1,
                varnames,
                nondet,
                ' '.join(sys.argv[2:])))

  os.system(cmd)
コード例 #46
0
ファイル: headshot.py プロジェクト: blexim/synth
def prove_terminates(filename):
  splitfile = tempfile.NamedTemporaryFile(mode='w', suffix='.c', delete=False)
  (id_map, has_nested, nondet) = splitter.split(filename, splitfile)
  nids = len(id_map)
  varnames = (' '.join(id_map[k] for k in xrange(nids)) + ' ' +
              ' '.join("0" for i in xrange(nids)))

  splitfile.close()

  cmd = (("kalashnikov.py " +
             "%s ../../tests/termination/combined.c " +
             "-P3 " +
             "--seed=1337 " +
             "--synth-strategy=genetic -a%d --evars %d --varnames %s --resnames I " +
             "--fastverif=True -c=1 -keepfrac=15 -mutprob=0.25 -newfrac=2 -popsize=500 " +
             "-recombprob=0.05 -tourneysize=10 -w=3 " +
             "%s") % 
              (splitfile.name,
                nids*2,
                nids,
                varnames,
                ' '.join(sys.argv[2:])))

  os.system(cmd)
コード例 #47
0
ファイル: testsplitter.py プロジェクト: astraltear/Python3
 def testtypeconvert(self):
     r = splitter.split("GOOD 100 490.50",[str, int, float])
     self.assertEqual(r,['GOOD',100,490.50])
コード例 #48
0
 def fit_(self, sentence, category):
     words = splitter.split(sentence)
     for word in words:
         self.count_word(word, category)
コード例 #49
0
ファイル: testsplitter.py プロジェクト: astraltear/Python3
 def testsimplestring(self):
     r= splitter.split("GOOD 100 490.50")
     self.assertEqual(r,['GOOD','100','490.50'])
コード例 #50
0
ファイル: preprocessing.py プロジェクト: bernardgut/MLTools
        amax = np.amax(A)
        amin = np.amin(A)
    
    #normalize
    J = amin*np.ones(A.shape)
    N = (A-J)/(amax-amin)
    return N, amax, amin
    
#load data
(A,L)=loader.loadTrainingSet_35()
print 'dataset 3-5 : Train : success'
print A.shape
#normalize
N, tmax, tmin = normalize(A)
#split data
(T, T_l, V, V_l)=splitter.split(N, L)
print '3-5 : Training.training : ', T.shape, ' l : ',T_l.shape, ' ; Training.validation : ',V.shape, ' l : ', ' max :',tmax,' min', tmin 
#save to disk
np.save('../mnist/n_MNIST_Training35',T)
np.save('../mnist/n_MNIST_Validation35',V)
np.save('../mnist/n_MNIST_Training_labels35',T_l)
np.save('../mnist/n_MNIST_Validation_labels35',V_l)

#load test data
(A,L)=loader.loadTestSet_35()
print 'dataset 3-5 : Test : success'
print A.shape
#normalize with the same min,max as training set
N,_,__ = normalize(A, tmax, tmin)
#split data
print '3-5 :Test set : ', N.shape, ' l : ',L.shape, ' max :',tmax,' min', tmin
コード例 #51
0
def remove_old_tiles():
  path = 'tiles'
  for file in os.listdir(path):
    if os.path.isfile(os.path.join(path, file)):
      try:
        os.remove(os.path.join(path, file))
      except:
        print('Could not delete', file, 'in', path)

if args.no_split:
  if os.path.exists(WORK_DIR + "tiles/" + buildmap + "_split.ready") == False:
    print()
    printwarning("can't find tiles/" + buildmap + "_split.ready")
    print("--no_split/-ns makes no sense, ignoring it")
    splitter.split()

else:
  remove_old_tiles()
  os.chdir(WORK_DIR)
  print()
  printinfo("now splitting the mapdata...")
  splitter.split()

"""
--stop_after splitter

"""

if args.stop_after == "splitter":
  print()
コード例 #52
0
def getwords(doc):
    words = [s.lower() for s in splitter.split(doc) if len(s) > 2 and len(s) < 20]
    # ユニークな単語の集合を返す
    return dict([(w, 1) for w in words])
コード例 #53
0
ファイル: testsplitter.py プロジェクト: Bee3key/Python_labs
	def testsimplestring(self):
		r = splitter.split('GOOG 100 490.50')
		self.assertEqual(r,['GOOG','100','490.50'])
コード例 #54
0
def splitargs(args):
    """Split the command line into tokens."""
    return closequote(split(args))
コード例 #55
0
ファイル: testsplitter.py プロジェクト: Bee3key/Python_labs
	def testtypeconvert(self):
		r = splitter.split('GOOG 100 490.50',[str, int, float])
		self.assertEqual(r,['GOOG', 100, 490.5])
コード例 #56
0
ファイル: testsplitter.py プロジェクト: Bee3key/Python_labs
	def testdelimiter(self):
		r = splitter.split('GOOG,100,490.50',delimiter=',')
		self.assertEqual(r,['GOOG','100','490.50'])
コード例 #57
0
ファイル: testsplitter.py プロジェクト: astraltear/Python3
 def testdelimeter(self):
     r = splitter.split("GOOD,100,490.50",delimeter=",")
     self.assertEqual(r,['GOOD','100','490.50'])
コード例 #58
0
ファイル: testsplitter.py プロジェクト: vatslav/python
	def testTypeConvert(self):
		r = splitter.split('Goog 100 490.50',[str,int,float])
		self.assertEqual(r,['Goog',100,490.50])
コード例 #59
0
ファイル: testsplitter.py プロジェクト: vatslav/python
	def testSimpleString(self):
		r = splitter.split('Goog 100 490.50')
		self.assertEqual(r,['Goog','100','490.50'])