Exemple #1
0
def calculate_redundancy(question, questions):
    f = 0.0
    for i in questions:
        # print f
        f = max(match(convert(question['body']), convert(i['body'])), f)
        # f = max(match(convert(question['body']), convert(i['body'])), f)
    return [f]
def runner(formula1, formula2):
    formula1 = forseti.parser.parse(formula1)
    formula2 = forseti.parser.parse(formula2)

    statement1, steps1 = util.convert(deepcopy(formula1))
    statement2, steps2 = util.convert(deepcopy(formula2))

    return statement1 == statement2, [statement1, steps1], [statement2, steps2]
Exemple #3
0
def runner(formula1, formula2, adjacency):
    formula1 = forseti.parser.parse(formula1)
    formula2 = forseti.parser.parse(formula2)
    print(type(formula1))

    statement1, steps1, isContra1 = util.convert(deepcopy(formula1), adjacency)
    statement2, steps2, isContra2 = util.convert(deepcopy(formula2), adjacency)

    return statement1 == statement2, [statement1, steps1, isContra1
                                      ], [statement2, steps2,
                                          isContra2], formula1
    def render(self, scr):
        x, y = util.convert(self.row, self.col)

        l0, h0, l1, h1 = self.bounds

        self.bounds.topleft = [x - l1 / 2, y - h1 / 2]
        scr.blit(self.image, self.bounds)
Exemple #5
0
def posts():
    " Displays a list of published posts "
    posts = app.db.session.query(Post).filter(Post.published)\
                                      .order_by(Post.published_dt.desc())
    page = convert(request.args.get('page'), int, 1)
    paginated_posts = posts.paginate(page=page, per_page=8)

    return render_template('/posts/index.tmpl', posts=paginated_posts)
def augment_data(data_dir: str,
                 specs_dir: str,
                 file_format: str = 'wav',
                 file_names_path: str = None,
                 wav_dir: str = None):
    """
    Augment data and generate spectrogram of audio files.

    :param data_dir: source directory of audio files
    :param specs_dir: output directory
    :param file_format: format of source audio files (default=wav)
    :param file_names_path: path for file containing file names to process
        In case no file is provided, all the files of the working source
        directory will be considered (default=None)
    :param wav_dir: output for converted audio files in wav format
        In case no path is provided, a temp file will be generated and removed
        in sequence (default=None)
    """
    if file_names_path is None:
        file_names = os.listdir(data_dir)
    else:
        file_names = open(file_names_path, 'r').readlines()
    # for i, line in enumerate(file_names_path.readlines()[
    #                          1:]):
    for i, line in enumerate(file_names):
        file_path = line.split(',')[0]
        file_name = str(file_path[:-4])
        if wav_dir is None:
            wav_file_name = 'tmp.wav'
        else:
            wav_file_name = file_name + '.wav'

        try:
            if file_format != 'wav':
                convert(file_path, wav_dir + '/' + wav_file_name)

            # augment_wav_data(wav_dir + '/' + wav_file_name, specs_dir,
            #                  file_name)
            # TODO
        except RuntimeError:
            print('WARNING: file {} not converted, skipping'.format(file_path))

        if wav_dir is None:
            os.remove(wav_file_name)
        print("processed %d files" % (i + 1))
Exemple #7
0
    def details():
        if 'user_id' not in session:
            return redirect('/login')

        user, usernames = model.details(session['user_id'])

        return {
            'user': convert(user),
            'usernames': usernames
        }
def parse(str):
    ret_values = []
    default = None
    
    step_type = '+'
    step = 1
    
    if str.startswith('['):
        # split the list of values from the default value (if present)
        at_idx = str.find('@')
        (val, def_val) = (str, None) if at_idx == -1 else (str[:at_idx], str[at_idx+1:]) 
        ret_values = [x.strip() for x in val[val.index('[')+1:val.index(']')].split(',')]
        if def_val:
            default = def_val.strip()
    elif str.startswith('('):
        # this is a stride operator
        at_idx = str.find('@')
        (val, def_val) = (str, None) if at_idx == -1 else (str[:at_idx], str[at_idx+1:])
        p = re.compile('\w+')
        v = p.findall(val)
        start = int(v[0])
        end = int(v[1])
        if(len(v) == 3):
            p = re.compile('\*\w+')
            if p.findall(val):
                step_type = '*'
            step = int(v[2])
        
        # Create values
        curr = start
        while curr <= end:
            ret_values.append(curr)
            curr = eval( 'curr {0} {1}'.format(step_type, step))
        if def_val:
            default = def_val.strip()
    else:
        # this is a single value variable
        if str.startswith("s\""):
            str = str.replace('\\\n','')[2:-1] # remove initial 's"' and final '"'
        ret_values.append(str)
            
    # try to convert the values into float/int
    return ( list(map(lambda x: convert(x), ret_values)), convert(default) )
Exemple #9
0
def post_audio():    
    mp3_file = flask.request.files['file']
    result = client.asr(util.mp3_to_pcm(mp3_file), 'wav', 16000, {
        'lan': 'zh',
    })
    asr_response = json.dumps(result, ensure_ascii=False)   
    if 'result' in result:
        try:
            result['result'] = util.convert(result['result'][0])
            app.logger.info("first -" + asr_response +  "second - [" + result['result'] + "]")
        except Exception, e:
	    app.logger.error("first -" + asr_response + "second -[" + e.message + "]")
            result['result'] = ''       
Exemple #10
0
def reservations():
    result = []
    for reservation in many(Reservation):
        if validate(reservation):
            if reservation.occupied is False:
                spot = one(Spot, reservation.spot_id)
                reservation_dict = convert(reservation)
                reservation_dict['spot_location'] = spot.location
                result.append(reservation_dict)
        else:
            abort(reservation)

    return result
Exemple #11
0
 def from_scryfall(cls, data):
     printing_col_names = [c.name for c in Printing.__table__.columns]
     printing_data = util.convert(data, {'set': 'set_code'})
     try:
         if 'image_uris' in printing_data:
             printing_data['image_uri'] = printing_data['image_uris'][
                 'normal']
         else:
             printing_data['image_uri'] = None
     except KeyError:
         print(printing_data)
         raise
     printing_data = util.restriction(data, printing_col_names)
     printing_data['card'] = Card.from_scryfall(data)
     return Printing(**printing_data)
def save_label_txt(img_shape, img_label, save_file):
    """
    将标签信息转换为yolo格式,并保存
    :param img_shape:图片形状长宽
    :param img_label:标签
    :param save_file:保存文件名
    :return:
    """
    height, width, _ = img_shape
    label_file = open(save_file, 'w')
    for label in img_label:
        target_id, x1, y1, x2, y2 = label
        label_box = (float(x1), float(x2), float(y1), float(y2))
        label_yolo = util.convert((width, height), label_box)
        label_file.write(
            str(target_id) + " " + " ".join([str(a)
                                             for a in label_yolo]) + '\n')
Exemple #13
0
def start():
    piano_folder = "25-Piano-Soundfonts"
    piano_options = convert(os.listdir(piano_folder))
    pp.pprint(piano_options)
    selected_piano = None
    while selected_piano is None:
        piano_name = input("Enter Piano: ")
        selected_piano = piano_options.get(piano_name)

    SF2 = f"{piano_folder}/{selected_piano}"
    print("Left Hand")
    pp.pprint(qwerty_keys_to_standard.get('left'))
    print("Right Hand")
    pp.pprint(qwerty_keys_to_standard.get('right'))
    if not fluidsynth.init(SF2):
        print("Couldn't load soundfont", SF2)
        sys.exit(1)
Exemple #14
0
def main():
    client = bigquery.Client('vdslab-covid19')
    table = client.get_table('twitter.test2')
    api = get_api()
    max_id = None
    while True:
        print(max_id)
        tweets = api.search('(コロナ OR covid19 OR 武漢肺炎) min_retweets:10',
                            lang='ja',
                            locale='ja',
                            result_type='recent',
                            count=1000,
                            max_id=max_id)
        if len(tweets) == 0:
            break
        client.insert_rows(table, [convert(status._json) for status in tweets])
        max_id = tweets.max_id
        time.sleep(5)
Exemple #15
0
    def capture(self):
        if not self.valid:
            return self.empty

        # Read a frame.
        ret, frame = self.feed.read()

        # Resize the frame.
        frame = cv2.resize(frame, (CAM_WIDTH, CAM_HEIGHT))

        # Convert the frame to RGB.
        frame = util.convert(frame)

        if not ret:
            # Something went wrong with the capture.
            return self.empty

        return frame
    def __init__(self, row, col, terrain=0, base=-1, crystals=0):
        pygame.sprite.Sprite.__init__(self)

        self.row = int(row)
        self.col = int(col)
        self.cx, self.cy = util.convert(self.row, self.col)

        self.base = int(base)
        self.crystals = int(crystals)
        self.terrain = self.TERRAIN_TYPE[int(terrain)]
        self.crystalImage = pygame.image.load(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'assets/crystal.png'))
        if terrain == "3":
            self.image = pygame.image.load(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'assets/rock.png'))
        else:
            self.image = None

        self.edges = (
            (self.cx,  self.cy - self.HEIGHT),
            (self.cx + self.WIDTH, self.cy - self.HEIGHT / 2),
            (self.cx + self.WIDTH, self.cy + self.HEIGHT / 2),
            (self.cx,  self.cy + self.HEIGHT),
            (self.cx - self.WIDTH, self.cy + self.HEIGHT / 2),
            (self.cx - self.WIDTH, self.cy - self.HEIGHT / 2),
        )
Exemple #17
0
def main(file_name):

    # Instance the rsa class
    rsa = alg.rsa()
    keys_gen = rsa.generate_key_pair()
    session_key = get_random_bytes(16)  # Random user's key 128 bits

    # Convert file from utf-8 to bytes
    convert = util.convert(file_name)
    text_to_bytes = convert.to_bytes()

    # Encrypt and decrypt the session key
    key_encrypted = rsa.encrypt_key(session_key)
    key_decrypted = rsa.decrypt_key(key_encrypted)

    # Instance the aes_cbc class
    aes = alg.aes_cbc(key_decrypted, text_to_bytes)
    encrypt_message = aes.encrypt_message()
    iv = encrypt_message[1]  # Export the initialize vector

    # Decrypt the message into the file
    decrypt_message = aes.decrypt_message(iv)

    return encrypt_message[0], decrypt_message
Exemple #18
0
# print e2[0].find("div",class_="image")
for elem in e2:
    t = elem.find("div",class_="text")
    res_name = t.find("div",class_="text-cnt Restaurants").a.text


    res_cui = unescape(t.find("div",class_="text-cnt Restaurants").p.text)
    # str.decode("utf-8").replace(res_cui, "@")
    special = u"\u2022"
    res_cui = res_cui.replace(special,'@')
    res_cui = parseres_name(res_cui)

    stats = t.select(".text-stats")
    res_data["name"] = res_name
    res_data["cuisine"] = res_cui
    res_data["food"] = convert(t.select(".i-number.i-number-red")[0].text)
    res_data["decor"] = convert(t.select(".i-number")[0].text)
    res_data["service"] = convert(t.select(".i-number")[1].text)
    res_data["cost"] = convert(t.select(".i-number")[2].text)
    print res_data
    newd = res_data.copy()
    res_data_list.append(newd)

print res_data_list
print len(res_data_list)

writetofile(res_data_list)
# st2r = "".expandtabs()

#
Exemple #19
0
def spots():
    refresh()
    return convert(many(Spot))
def callback(ch, method, properties, body):
    id_, datum_ = convert(body)
    update_jubatus(datum_)
    print "update succeeded (ID: " + str(id_) + ")"
Exemple #21
0
# coding: utf-8
import util
import pandas as pd
import pickle



path = 'data/'

train = pd.read_csv(path + 'train.csv')
test = pd.read_csv(path + 'test.csv')

# convert attributes to wide tables
event_type = util.convert(pd.read_csv(path + 'event_type.csv'),add_count=True,count_column='event_type_count')
# usecols - to control which columns to be parsed
log_feature = util.convert(pd.read_csv(path + 'log_feature.csv'),fill='zero',add_count=True,count_column='log_feature_count')
resource_type = util.convert(pd.read_csv(path + 'resource_type.csv'), add_count=True,count_column='resource_type_count')
severity_type = util.convert(pd.read_csv(path + 'severity_type.csv'))
location = util.convert(pd.read_csv(path + 'location.csv'),add_count=True,count_column='location_count')

# move id to the index, for merge purpose
event_type.set_index('id', inplace=True)
log_feature.set_index('id', inplace=True)
resource_type.set_index('id', inplace=True)
severity_type.set_index('id', inplace=True)
location.set_index('id', inplace=True)

train.set_index('id', inplace=True)
test.set_index('id', inplace=True)

temp = train.drop(['location','fault_severity'], axis=1)
parser = argparse.ArgumentParser()
parser.add_argument('config_file', help='Config file', type=str)
parser.add_argument(
    '--repeat', help='Whether or not to repeat the experiment', type=int,
    default=1)
parser.add_argument(
    '--plot_data', help='Whether or not to plot the data', action='store_true')
parser.add_argument(
    '--cv_config_file', help='Config file after cv', type=str, default="")
parser.add_argument(
    '--n_jobs', help='Number of jobs running in parallel', type=int, default=2)
args = parser.parse_args()

config_file = args.config_file
config = util.convert(json.loads(open(config_file).read()))
cv_config_file = args.cv_config_file
plot_data = args.plot_data
repeat = args.repeat
n_jobs = args.n_jobs

dataset_config = config["dataset"]
classifiers = config["classifiers"]
cross_validation = config["cross_validation"]
use_kfold = True
if "n_folds" in config:
  n_folds = config["n_folds"]
if "test_size" in config:
  use_kfold = False
  test_size = config["test_size"]
  cv_n_iter = config["cv_n_iter"]
from sklearn.preprocessing import StandardScaler
import preprocessing
import argparse
import util

args = util.parse_cmdline()
print(args)

if args.precision == 'double':
    type = 'f8'
else:
    type = 'f4'

# print("Decompressing the dataset...", end=" ")
print("Decompressing the dataset...")
images = util.convert(args.data, args.labels, args.N)
images = preprocessing.filter_classes(images, args.classes, type)

Y = preprocessing.get_binary_labels(images, args.classes)
X = preprocessing.get_data(images, type)
X_tr, X_ts, Y_tr, Y_ts = train_test_split(X,
                                          Y,
                                          test_size=(1 - args.train),
                                          random_state=4)

mean = X_tr.mean()
std = X_tr.std()
X_tr = (X_tr - mean) / std
X_ts = (X_ts - mean) / std

print(X_tr.mean(), X_tr.std())
        Path(__file__).resolve().parents[2] / "data_concelhos_incidencia.csv")
    PATH_TO_CSV_14DIAS = str(
        Path(__file__).resolve().parents[2] / "data_concelhos_14dias.csv")

    # Get list of municipalities
    concelhos_df = get_list_municipalities()

    # Get list of cases
    casos_df = get_list_cases_long()

    casos_df["confirmados_14"] = (casos_df["incidencia"] *
                                  casos_df["population"] / 100000.0)
    casos_df["confirmados_1"] = casos_df["confirmados_14"].div(14)

    cols = ["confirmados_14", "confirmados_1"]
    casos_df = convert(casos_df, cols, convert_to_int)
    cols = ["incidencia"]
    casos_df = convert(casos_df, cols, convert_to_float)
    cols = [x for x in casos_df.columns if x.startswith("densidade")]
    casos_df = convert(casos_df, cols, convert_to_float)
    cols = [x for x in casos_df.columns if x.startswith("population")]
    casos_df = convert(casos_df, cols, convert_to_int)

    cols = list(casos_df.columns)
    for i in ["data", "confirmados_14", "confirmados_1"]:
        cols.remove(i)
    cols.insert(cols.index("concelho") + 1, "confirmados_14")
    cols.insert(cols.index("concelho") + 2, "confirmados_1")
    cols.insert(0, "data")
    casos_df = casos_df[cols]
Exemple #25
0
            updated = updated.melt(id_vars=["data"],
                                   var_name="Concelho",
                                   value_name="Casos")
            updated = updated.merge(
                population[["Concelho", "2019"]],
                how="left",
                left_on="Concelho",
                right_on="Concelho",
            )
            updated.fillna(0, inplace=True)
            updated["ratio"] = round(
                updated["Casos"] * 100 * 1000 / updated["2019"], 1)
            updated = updated.pivot_table(values="ratio",
                                          index="data",
                                          columns="Concelho")

            updated = updated.reset_index(level=0)

        cols = [x for x in updated.columns if x != "data"]
        updated[cols] = updated[cols].diff(2)  # 14 days
        updated = updated[2:]
        func = convert_to_int if i == DATA_CONCELHOS_14DIAS_CSV else convert_to_float
        updated = convert(updated, cols, func)

        # sort by date
        updated = updated.sort_values("data")
        # convert back into dd-mm-yyyy
        updated["data"] = updated["data"].dt.strftime("%d-%m-%Y")

        updated.to_csv(i, index=False, line_terminator="\n")
Exemple #26
0
            transport._preferred_keys = [ hostkey.get_name() ]

            key = transport.get_remote_server_key()
            if (key.get_name() != hostkey.get_name() 
                                                or str(key) != str(hostkey)):
                log.error('Bad host key from server (%s).' % name)
                raise AuthenticationError('Bad host key from server (%s).'
                                                                    % name)
            log.info('Server host key verified (%s) for %s' % (key.get_name(), 
                                                                    name))

        privkey = cipher.decipher(tags['site'].get('privkey', 
                                 tags['site'].get('pkey', '')))
        password = cipher.decipher(tags['site'].get('password', ''))
        password_encoding = tags['site'].get('password_encoding', 'utf8')
        password = convert(password, password_encoding)

        authentified = False
        if privkey:
            privkey = util.get_dss_key_from_string(privkey)
            try:
                transport.auth_publickey(tags['site']['login'], privkey)
                authentified = True
            except AuthenticationException:
                log.warning('PKey for %s was not accepted' % name)

        if not authentified and password:
            try:
                transport.auth_password(tags['site']['login'], password)
                authentified = True
            except AuthenticationException:
Exemple #27
0
import json, os
import util

setting_path = os.path.join(os.path.dirname(__file__), 'settings.json')
with open(setting_path) as settingFile:
    settings = util.convert(json.load(settingFile))

extras = ['papertype', 'key', 'extra']

settings['lookup_fields'] = settings['bib_fields'] + extras + ['thing']
settings['fields'] = settings['bib_fields'] + extras


Exemple #28
0
def set_length(path, objct, dur):
    aud = objct(path)

    aud.info.length = convert(dur)

    aud.save()
Exemple #29
0
            MathJax.Hub.Config({
                tex2jax: {
                inlineMath: [ ['$','$'],],
                processEscapes: true
                }
            });
            </script>
            {%renderer%}
        </footer>
    </body>
</html>
'''

f = open(os.path.join(filepath, "q2_particle_in_an_infinite_potential_box.md"),
         "r")
text_list = util.convert(f.read(), lateximg=True, addbutton=True, addtoc=True)
mds = []
print("Total {} blocks of Markdown".format(len(text_list)))
for t in text_list:
    tmp = dcc.Markdown(t, dangerously_allow_html=True)
    mds.append(tmp)

#####################################################################


def get_1dbox(n=5, L=10, num_me=1, all_levels=False):
    # Defining the wavefunction
    def psi(x, n, L):
        return np.sqrt(2.0 / L) * np.sin(float(n) * np.pi * x / L)

    def get_energy(n, L, m):
Exemple #30
0
        'arslvt': 3,
        'arsalentejo': 4,
        'arsalgarve': 5,
        'madeira': 6,
        'açores': 7,
        'outro': 8,
        'all': 9,
    }
    cols = sorted(cols, key=lambda x: ARS_ORDER[x.split('_')[-1]])
    data_regional = data_regional[cols]

    # concatena tudo numa wiiiiiide table
    data_wide = pd.concat([data_general, data_ages, data_regional], axis=1)

    # limpa dados - inteiros,
    #  e floats com 10 casas para prevenir inconsistencias entre plataformas 0.3(3)
    cols = [
        x for x in data_wide.columns
        if not x.startswith("data") and not 'perc' in x
    ]
    data_wide = convert(data_wide, cols, convert_to_int)
    cols = [x for x in data_wide.columns if 'perc' in x]
    data_wide[cols] = data_wide[cols].apply(lambda x: round(x, 10))

    # recalcula a data, just in case
    data_wide['data'] = data_wide.index
    data_wide['data'] = data_wide['data'].apply(
        lambda x: x.strftime('%d-%m-%Y'))

    data_wide.to_csv(PATH_TO_ROOT / 'vacinas_detalhe.csv', index=False)
Exemple #31
0
 def from_scryfall(cls, data):
     mapping = {'released_at': 'release_date'}
     data = util.convert(data, mapping)
     data['release_date'] = date.fromisoformat(data['release_date'])
     col_names = [c.name for c in Set.__table__.columns]
     return Set(**util.restriction(data, col_names))
Exemple #32
0
def garages():
    refresh()
    return convert(many(Garage))
Exemple #33
0
    casos_df = get_list_cases_long()

    # Merge list of cases with list of municipalities
    casos_df = concelhos_df.merge(casos_df, how="left", on="concelho")

    # Helper for pivot table
    casos_df.loc[casos_df.data.isna(), ["confirmados"]] = -1
    casos_df.loc[casos_df.data.isna(), ["data"]] = "24-03-2020"

    casos_df = casos_df.sort_values(by=["concelho"])

    # Convert long table to wide table
    casos_wide = pd.pivot_table(casos_df,
                                values="confirmados",
                                index="data",
                                columns="concelho")
    casos_wide = casos_wide.reset_index()

    casos_wide.data = pd.to_datetime(casos_wide.data, format="%d-%m-%Y")
    casos_wide = casos_wide.sort_values(by="data").reset_index(drop=True)
    casos_wide = casos_wide.replace(-1, np.nan)

    casos_wide = patch_concelhos1(casos_wide)
    casos_wide.data = casos_wide["data"].dt.strftime("%d-%m-%Y")
    casos_wide = patch_concelhos2(casos_wide)

    cols = [x for x in casos_wide.columns if not x.startswith("data")]
    casos_wide = convert(casos_wide, cols, convert_to_int)

    casos_wide.to_csv(PATH_TO_CSV, index=False, sep=",")
def calculate_redundancy(question, questions):
    f = 0.0
    for i in questions:
        f = max(match(convert(question['body']), convert(i['body'])), f)
    return [f]
Exemple #35
0
def train_conv_net(datasets,
                   U,
                   idx_word_map,
                   img_w=300,
                   filter_hs=[3, 4, 5],
                   hidden_units=[100, 2],
                   dropout_rate=[0.5],
                   shuffle_batch=True,
                   n_epochs=25,
                   batch_size=50,
                   lr_decay=0.95,
                   conv_non_linear="relu",
                   activations=[Iden],
                   sqr_norm_lim=9,
                   non_static=True,
                   sen_dropout_rate=[0.0],
                   whether_train_sen=True):

    rng = np.random.RandomState(3435)
    img_h = datasets[0][0][0].shape[0] - 1
    filter_w = img_w
    feature_maps = hidden_units[0]
    filter_shapes = []
    pool_sizes = []
    for filter_h in filter_hs:
        filter_shapes.append((feature_maps, 1, filter_h, filter_w))
        pool_sizes.append((img_h - filter_h + 1, img_w - filter_w + 1))
    parameters = [("image shape", img_h, img_w),
                  ("filter shape", filter_shapes),
                  ("hidden_units", hidden_units), ("dropout", dropout_rate),
                  ("batch_size", batch_size), ("non_static", non_static),
                  ("learn_decay", lr_decay),
                  ("conv_non_linear", conv_non_linear),
                  ("non_static", non_static), ("sqr_norm_lim", sqr_norm_lim),
                  ("shuffle_batch", shuffle_batch),
                  ('sentence dropout rate', sen_dropout_rate)]
    print(parameters)

    #define model architecture
    index = T.lscalar()
    x = T.tensor3('x')
    y = T.ivector('y')
    sen_x = T.matrix('sen_x')
    mark = T.matrix('mark')
    sen_y = T.ivector('sen_y')
    Words = theano.shared(value=U, name="Words")
    zero_vec_tensor = T.vector()
    zero_vec = np.zeros(img_w)
    set_zero = theano.function([zero_vec_tensor],
                               updates=[
                                   (Words,
                                    T.set_subtensor(Words[0, :],
                                                    zero_vec_tensor))
                               ],
                               allow_input_downcast=True)
    layer0_input = Words[T.cast(x.flatten(), dtype="int32")].reshape(
        (x.shape[0] * x.shape[1], 1, x.shape[2], Words.shape[1]))
    sen_layer0_input = Words[T.cast(sen_x.flatten(), dtype='int32')].reshape(
        (sen_x.shape[0], 1, sen_x.shape[1], Words.shape[1]))
    conv_layers = []
    layer1_inputs = []
    Doc_length = datasets[0][0].shape[0]
    sen_layer1_inputs = []
    for i in xrange(len(filter_hs)):
        filter_shape = filter_shapes[i]
        pool_size = pool_sizes[i]
        conv_layer = LeNetConvPoolLayer(rng,
                                        input=layer0_input,
                                        image_shape=(None, 1, img_h, img_w),
                                        filter_shape=filter_shape,
                                        poolsize=pool_size,
                                        non_linear=conv_non_linear)
        layer1_input = conv_layer.output.flatten(2)
        conv_layers.append(conv_layer)
        layer1_inputs.append(layer1_input)
        sen_layer1_input = conv_layer.predict(sen_layer0_input,
                                              None).flatten(2)
        sen_layer1_inputs.append(sen_layer1_input)

    layer1_input = T.concatenate(layer1_inputs, 1)
    sen_layer1_input = T.concatenate(sen_layer1_inputs, 1)
    hidden_units[0] = feature_maps * len(filter_hs)
    sen_hidden_units = [feature_maps * len(filter_hs), 3]
    shaped_mark = T.flatten(mark)
    sen_classifier1 = MLPDropout(rng,
                                 input=sen_layer1_input,
                                 layer_sizes=sen_hidden_units,
                                 activations=activations,
                                 dropout_rates=sen_dropout_rate)
    sen_cost = sen_classifier1.dropout_negative_log_likelihood(sen_y)
    sen_pos_prob = T.max(
        sen_classifier1.predict_p(layer1_input)[:, np.array([0, 2])], axis=1)
    prev_layer1_output, updates = theano.scan(
        fn=lambda i, x: x[i * Doc_length:i * Doc_length + Doc_length],
        sequences=[T.arange(batch_size)],
        non_sequences=layer1_input * (sen_pos_prob.dimshuffle(0, 'x')) *
        (shaped_mark.dimshuffle(0, 'x')))
    layer1_output = T.sum(prev_layer1_output, axis=1)
    classifier = MLPDropout(rng,
                            input=layer1_output,
                            layer_sizes=hidden_units,
                            activations=activations,
                            dropout_rates=dropout_rate)

    #define parameters of the model and update functions using adadelta
    params = classifier.params
    for conv_layer in conv_layers:
        params += conv_layer.params
    if non_static:
        params += [Words]

    #add sentence level parameters
    sen_params = sen_classifier1.params
    for conv_layer in conv_layers:
        sen_params += conv_layer.params
    if non_static:
        sen_params += [Words]

    cost = classifier.negative_log_likelihood(y)
    dropout_cost = classifier.dropout_negative_log_likelihood(y)
    grad_updates = sgd_updates_adadelta(params, dropout_cost, lr_decay, 1e-6,
                                        sqr_norm_lim)
    sen_grad_updates = sgd_updates_adadelta(sen_params, sen_cost, lr_decay,
                                            1e-6, sqr_norm_lim)

    np.random.seed(3435)
    train_mask = np.zeros((datasets[0].shape[0], datasets[0].shape[1]),
                          dtype='float32')  ##doc length * number of documnts
    test_mask = np.zeros((datasets[2].shape[0], datasets[2].shape[1]),
                         dtype='float32')

    #set the mask
    for i in range(datasets[0].shape[0]):
        for j in range(datasets[0][i].shape[0]):
            if np.count_nonzero(datasets[0][i][j]) != 0:
                train_mask[i][j] = 1.0

    for i in range(datasets[2].shape[0]):
        for j in range(datasets[2][i].shape[0]):
            if np.count_nonzero(datasets[2][i][j]) != 0:
                test_mask[i][j] = 1.0

    if datasets[0].shape[0] % batch_size > 0:
        extra_data_num = batch_size - datasets[0].shape[0] % batch_size
        permuted_index = np.random.permutation(range(datasets[0].shape[0]))
        permuted_index = np.append(permuted_index,
                                   permuted_index[:extra_data_num])
        new_data = datasets[0][permuted_index]
    else:
        permuted_index = np.random.permutation(range(datasets[0].shape[0]))
        new_data = datasets[0][permuted_index]

    n_batches = new_data.shape[0] / batch_size
    n_train_batches = int(np.round(n_batches * 0.9))

    #divide train set into train/val sets
    train_set_y = datasets[1][permuted_index]
    test_set_x, test_set_y = shared_dataset(
        (datasets[2][:, :, :-1], datasets[3]))
    test_set_mark = theano.shared(test_mask.astype(theano.config.floatX))

    train_mask = train_mask[permuted_index]
    train_set_mark = train_mask[:n_train_batches * batch_size]
    train_set_mark = theano.shared(train_set_mark.astype(theano.config.floatX))

    train_set_with_sen_label = new_data[:n_train_batches * batch_size]
    val_set_with_sen_label = new_data[n_train_batches * batch_size:]

    train_set = new_data[:n_train_batches * batch_size, :, :-1]
    train_set_label = train_set_y[:n_train_batches * batch_size]

    val_set = new_data[n_train_batches * batch_size:, :, :-1]
    val_set_label = train_set_y[n_train_batches * batch_size:]
    val_set_mark = train_mask[n_train_batches * batch_size:]
    val_set_mark = theano.shared(val_set_mark.astype(theano.config.floatX))

    train_set_x, train_set_y = shared_dataset((train_set, train_set_label))

    val_set_x, val_set_y = shared_dataset((val_set, val_set_label))

    n_val_batches = n_batches - n_train_batches
    val_model = theano.function(
        [index],
        classifier.errors(y),
        givens={
            x: val_set_x[index * batch_size:(index + 1) * batch_size],
            y: val_set_y[index * batch_size:(index + 1) * batch_size],
            mark: val_set_mark[index * batch_size:(index + 1) * batch_size]
        },
        allow_input_downcast=True)

    #compile theano functions to get train/val/test errors
    test_model = theano.function(
        [index],
        classifier.errors(y),
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size],
            mark: train_set_mark[index * batch_size:(index + 1) * batch_size]
        },
        allow_input_downcast=True)

    train_model = theano.function(
        [index],
        cost,
        updates=grad_updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size],
            mark: train_set_mark[index * batch_size:(index + 1) * batch_size]
        },
        allow_input_downcast=True)

    test_pred_layers = []
    test_size = datasets[2].shape[0]
    test_batch_size = 1
    n_test_batches = int(math.ceil(test_size / float(test_batch_size)))
    test_layer0_input = Words[T.cast(x.flatten(), dtype="int32")].reshape(
        (x.shape[0] * x.shape[1], 1, x.shape[2], Words.shape[1]))
    for conv_layer in conv_layers:
        test_layer0_output = conv_layer.predict(test_layer0_input,
                                                test_batch_size * Doc_length)
        test_pred_layers.append(test_layer0_output.flatten(2))
    test_layer1_input = T.concatenate(test_pred_layers, 1)
    test_sen_prob = T.max(
        sen_classifier1.predict_p(test_layer1_input)[:, np.array([0, 2])],
        axis=1)
    test_sen_prob_to_sen, updates = theano.scan(
        fn=lambda i, x: x[i * Doc_length:i * Doc_length + Doc_length],
        sequences=[T.arange(test_batch_size)],
        non_sequences=test_sen_prob)

    sorted_index = T.argsort(test_sen_prob_to_sen * shaped_mark, axis=-1)[:,
                                                                          -5:]
    sorted_sentence, updates = theano.scan(
        fn=lambda i, y: y[i, sorted_index[i], :],
        sequences=[T.arange(sorted_index.shape[0])],
        non_sequences=x)
    sorted_prob, updates = theano.scan(
        fn=lambda i, z: z[i, sorted_index[i]],
        sequences=[T.arange(sorted_index.shape[0])],
        non_sequences=test_sen_prob_to_sen)

    sorted_sentence_value = theano.function(
        [index],
        sorted_sentence,
        allow_input_downcast=True,
        givens={
            x:
            test_set_x[index * test_batch_size:(index + 1) * test_batch_size],
            mark:
            test_set_mark[index * test_batch_size:(index + 1) *
                          test_batch_size]
        })

    sorted_prob_val = theano.function(
        [index],
        sorted_prob,
        allow_input_downcast=True,
        givens={
            x:
            test_set_x[index * test_batch_size:(index + 1) * test_batch_size],
            mark:
            test_set_mark[index * test_batch_size:(index + 1) *
                          test_batch_size]
        })

    test_layer1_output, updates = theano.scan(
        fn=lambda i, x: x[i * Doc_length:i * Doc_length + Doc_length],
        sequences=[T.arange(test_batch_size)],
        non_sequences=test_layer1_input * (test_sen_prob.dimshuffle(0, 'x')) *
        (shaped_mark.dimshuffle(0, 'x')))
    test_layer1_output = T.sum(test_layer1_output, axis=1)
    test_y_pred = classifier.predict(test_layer1_output)
    test_error = T.mean(T.neq(test_y_pred, y))

    test_model_all = theano.function(
        [index],
        test_error,
        allow_input_downcast=True,
        givens={
            x:
            test_set_x[index * test_batch_size:(index + 1) * test_batch_size],
            y:
            test_set_y[index * test_batch_size:(index + 1) * test_batch_size],
            mark:
            test_set_mark[index * test_batch_size:(index + 1) *
                          test_batch_size],
        })

    print('... training')
    epoch = 0
    best_val_perf = 0
    val_perf = 0
    test_perf = 0
    cost_epoch = 0
    sen_batch_size = 50
    best_sen_param = []
    for p in sen_params:
        best_sen_param.append(theano.shared(p.get_value()))

    #first training on sentences
    best_sen_val = 0.0
    if whether_train_sen == True:
        print('pre-train on sentences')
        while (epoch < 20):
            sen_costs = []
            train_sen = train_set_with_sen_label
            train_sentences = util.doc_to_sen(train_sen)
            train_sentences = util.remove(train_sentences)
            train_sentences = util.downsample_three(train_sentences)
            print("positive sentences after sampling: " +
                  str(np.sum(train_sentences[:, -1] == 2)))
            print("negative sentences after sampling: " +
                  str(np.sum(train_sentences[:, -1] == 0)))
            print("neutral sentences after sampling: " +
                  str(np.sum(train_sentences[:, -1] == 1)))
            train_sentences = np.random.permutation(train_sentences)
            if train_sentences.shape[0] % sen_batch_size != 0:
                extra_data_num = sen_batch_size - train_sentences.shape[
                    0] % sen_batch_size
                extra_index = np.random.permutation(
                    range(train_sentences.shape[0]))[:extra_data_num]
                train_sentences = np.vstack(
                    (train_sentences, train_sentences[extra_index]))
            train_sen_x, train_sen_y = shared_dataset(
                (train_sentences[:, :-1], train_sentences[:, -1]))
            train_sen_model = theano.function(
                [index],
                sen_cost,
                updates=sen_grad_updates,
                givens={
                    sen_x:
                    train_sen_x[index * sen_batch_size:(index + 1) *
                                sen_batch_size],
                    sen_y:
                    train_sen_y[index * sen_batch_size:(index + 1) *
                                sen_batch_size]
                })

            n_train_sen_batches = train_sentences.shape[0] / sen_batch_size
            for minibatch_index_1 in np.random.permutation(
                    range(n_train_sen_batches)):
                cur_sen_cost = train_sen_model(minibatch_index_1)
                sen_costs.append(cur_sen_cost)
                set_zero(zero_vec)

            print("training sentence cost: " +
                  str(sum(sen_costs) / len(sen_costs)))
            val_sen = val_set_with_sen_label
            val_sentences = util.doc_to_sen(val_sen)
            val_sentences = util.remove(val_sentences)
            print("positive sentences in the validation set: " +
                  str(np.sum(val_sentences[:, -1] == 2)))
            print("negative sentences in the validation set: " +
                  str(np.sum(val_sentences[:, -1] == 0)))
            print("neutral sentences in the validation set: " +
                  str(np.sum(val_sentences[:, -1] == 1)))
            val_sen_x, val_sen_y = shared_dataset(
                (val_sentences[:, :-1], val_sentences[:, -1]))
            val_sen_model = theano.function([],
                                            sen_classifier1.errors(sen_y),
                                            givens={
                                                sen_x: val_sen_x,
                                                sen_y: val_sen_y
                                            })
            val_accuracy = 1 - val_sen_model()
            print("validation sentence accuracy: " + str(val_accuracy))
            if val_accuracy > best_sen_val:
                best_sen_val = val_accuracy
                for i, p in enumerate(best_sen_param):
                    p.set_value(sen_params[i].get_value())
            epoch = epoch + 1
        for i, sp in enumerate(sen_params):
            sp.set_value(best_sen_param[i].get_value())

    #train on documents
    epoch = 0
    while (epoch < n_epochs):
        start_time = time.time()
        epoch = epoch + 1
        if shuffle_batch:
            for minibatch_index in np.random.permutation(
                    range(n_train_batches)):
                cost_epoch = train_model(minibatch_index)
                set_zero(zero_vec)
        else:
            for minibatch_index in xrange(n_train_batches):
                cost_epoch = train_model(minibatch_index)
                set_zero(zero_vec)
        train_losses = [test_model(i) for i in xrange(n_train_batches)]
        train_perf = 1 - np.mean(train_losses)
        val_losses = [val_model(i) for i in xrange(n_val_batches)]
        val_perf = 1 - np.mean(val_losses)
        print(
            'epoch: %i, training time: %.2f secs, train perf: %.2f %%, val perf: %.2f %%'
            % (epoch, time.time() - start_time, train_perf * 100.,
               val_perf * 100.))
        if val_perf >= best_val_perf:
            best_val_perf = val_perf
            test_loss = [test_model_all(i) for i in xrange(n_test_batches)]
            test_perf = 1 - np.sum(test_loss) / float(test_size)
            print("best test performance so far: " + str(test_perf))
    test_loss = [test_model_all(i) for i in xrange(n_test_batches)]
    new_test_loss = []
    for i in test_loss:
        new_test_loss.append(np.asscalar(i))
    test_loss = new_test_loss
    correct_index = np.where(np.array(test_loss) == 0)[0]
    count_pos = 0
    test_labels = np.array(datasets[3])

    # sample two correctly predicted positive documents and two correctly predicted negative documents
    # for each document, generate top five rationales with highest probabilities
    print("negative estimated rationales: ")
    print(len(idx_word_map))
    for c in correct_index:
        if test_labels[c] == 1: continue
        print(util.convert(sorted_sentence_value(c)[0], idx_word_map))
        print(sorted_prob_val(c))
        count_pos += 1
        if count_pos == 2:
            break

    count_neg = 0
    print("positive estimated rationales: ")
    for c in correct_index:
        if test_labels[c] == 0: continue
        print(util.convert(sorted_sentence_value(c)[0], idx_word_map))
        print(sorted_prob_val(c))
        count_neg += 1
        if count_neg == 2:
            break
    return test_perf
Exemple #36
0
    #print(key,key1,key2)
    key=util.urlencode(key,cfg[4])
    key=key.replace('%5C','%')
    print(key)

    scfg=[]
    n,t=lg.getsearchparam(key)
    for i in range(n):
        scfg.append(t[i+1])
        print(t[i+1])

    # navigate
    res = request.urlopen(cfg[5])
    print(res.status, res.reason)
    s = res.read()
    s = util.convert(s,cfg[1])
    with open(file,'wb') as f:
        f.write(s)
    #gethomecomics
    comics = []
    n,t=lg.gethomecomics(file)
    for i in range(n):
        comics.append(t[i+1])
        #print(i+1,t[i+1])
    print(comics[0])
    #cats
    cats = []
    comics = []
    n,t=lg.getcats(file)
    for i in range(n):
        cats.append(t[i+1])
Exemple #37
0
def details(reservation: Reservation):
    spot = one(Spot, reservation.spot_id)
    garage_dict = convert(one(Garage, spot.garage_id))
    garage_dict['spot'] = convert(spot)
    garage_dict.pop('spots')
    return garage_dict
Exemple #38
0
    def tick(self):
        # Read from the camera.
        self.camera_frame = self.camera.get()
        if self.live_segment:
            if self.live_segment_ready:
                self.live_segment_ready = False
                self.model_process.submit(COMMAND_SEGMENT,
                                          (EVENT_CAMERA_SEGMENT, self.camera_frame))
        else:
            self.camera_segmented = None

        # Display the current camera frame.
        camera_combined = self.camera_frame
        if self.camera_segmented is not None:
            camera_combined = self.camera_frame // 3 + self.camera_segmented
        util.display_image(CAM_WINDOW, camera_combined)

        # Capture a pressed key.
        self.key = cv2.waitKey(1) & 0xff

        # Toggle live segmenting if the live segmenting key is pressed.
        if self.key_pressed(KEY_LIVE):
            self.live_segment = not self.live_segment

        # Capture a frame if the capture key is pressed.
        if self.key_pressed(KEY_CAPTURE):
            self.capture(self.camera_frame)

        # Open a file if the open key is pressed.
        if self.key_pressed(KEY_OPEN):
            path = input('path> ')
            try:
                image = cv2.imread(path)
                image = util.convert(image)
                image = cv2.resize(image, (CAM_WIDTH, CAM_HEIGHT))
                self.capture(image)
                print('Image loaded')
            except:
                print('Invalid path')

        # Export the segmented image if the export key is pressed.
        if self.key_pressed(KEY_EXPORT):
            if self.canvas is not None:
                path = input('path> ')
                try:
                    cv2.imwrite(path, util.convert(self.canvas.get_combined()))
                    print('Image saved')
                except:
                    print('Invalid path')

        # Fill the canvas if the fill key is pressed.
        if self.key_pressed(KEY_FILL):
            if self.canvas is not None:
                self.canvas.fill()

        # Process the segment map if the process key is pressed.
        if self.key_pressed(KEY_PROCESS):
            if self.canvas is not None:
                self.process(self.canvas.get_map())

        # Save the result if the save key is pressed.
        if self.key_pressed(KEY_SAVE):
            if self.im_processed is not None:
                path = input('path> ')
                try:
                    cv2.imwrite(path, util.convert(self.im_processed))
                    print('Image saved')
                except:
                    print('Invalid path')

        # Segment the result if the test key is pressed.
        if self.key_pressed(KEY_TEST):
            if self.im_processed is not None:
                self.capture(self.im_processed)

        # Quit if the quit key is pressed.
        if self.key_pressed(KEY_QUIT):
            self.camera.stop()
            self.model_process.stop()
            return False

        # Tick the model process.
        self.model_process.tick()

        return True
def callback(ch, method, properties, body):
    id_, datum_ = convert(body)
    # id_ not used
    score = analyze_jubatus(datum_)
    print score
Exemple #40
0
def get_data():
    path = "data/"

    train = pd.read_csv(path + "train.csv")
    test = pd.read_csv(path + "test.csv")

    # convert attributes to wide tables
    event_type = util.convert(pd.read_csv(path + "event_type.csv"), add_count=True, count_column="event_type_count")
    # usecols - to control which columns to be parsed
    log_feature = util.convert(
        pd.read_csv(path + "log_feature.csv"), fill="zero", add_count=True, count_column="log_feature_count"
    )
    resource_type = util.convert(
        pd.read_csv(path + "resource_type.csv"), add_count=True, count_column="resource_type_count"
    )
    severity_type = util.convert(pd.read_csv(path + "severity_type.csv"))
    location = util.convert(pd.read_csv(path + "location.csv"), add_count=True, count_column="location_count")

    # move id to the index, for merge purpose
    event_type.set_index("id", inplace=True)
    log_feature.set_index("id", inplace=True)
    resource_type.set_index("id", inplace=True)
    severity_type.set_index("id", inplace=True)
    location.set_index("id", inplace=True)

    train.set_index("id", inplace=True)
    test.set_index("id", inplace=True)

    temp = train.drop(["location", "fault_severity"], axis=1)

    # In[11]:

    # merge with training dataset
    id_event_type = pd.merge(temp, event_type, left_index=True, right_index=True, how="inner")
    id_log_feature = pd.merge(temp, log_feature, left_index=True, right_index=True, how="inner")
    id_resource_type = pd.merge(temp, resource_type, left_index=True, right_index=True, how="inner")
    id_severity_type = pd.merge(temp, severity_type, left_index=True, right_index=True, how="inner")
    id_location = pd.merge(temp, location, left_index=True, right_index=True, how="inner")

    train = train.drop("location", axis=1)
    df_train = pd.concat(
        [train, id_event_type, id_log_feature, id_resource_type, id_severity_type, id_location], axis=1
    )

    # In[14]:

    # do the similar for test dataset
    temp = test.drop("location", axis=1)

    t_id_event_type = pd.merge(temp, event_type, left_index=True, right_index=True, how="inner")
    t_id_log_feature = pd.merge(temp, log_feature, left_index=True, right_index=True, how="inner")
    t_id_resource_type = pd.merge(temp, resource_type, left_index=True, right_index=True, how="inner")
    t_id_severity_type = pd.merge(temp, severity_type, left_index=True, right_index=True, how="inner")
    t_id_location = pd.merge(temp, location, left_index=True, right_index=True, how="inner")

    df_test = pd.concat(
        [temp, t_id_event_type, t_id_log_feature, t_id_resource_type, t_id_severity_type, t_id_location], axis=1
    )

    df_train = df_train.reset_index()
    df_test = df_test.reset_index()

    X_train, y_train = df_train.ix[:, 2:], df_train.ix[:, 1]

    X_test = df_test.ix[:, 1:]
    X_ids = df_test.ix[:, 0]

    output = {}
    output["X_train"] = X_train
    output["y_train"] = y_train
    output["X_test"] = X_test
    output["X_ids"] = X_ids

    return output
Exemple #41
0
               critic,
               t.optim.Adam,
               nn.MSELoss(reduction='sum'),
               actor_learning_rate=1e-5,
               critic_learning_rate=1e-4)

    episode, step, reward_fulfilled = 0, 0, 0
    smoothed_total_reward = 0

    while episode < max_episodes:
        episode += 1
        total_reward = 0
        terminal = False
        step = 0
        hidden = t.zeros([1, 1, 256])
        state = convert(env.reset())

        tmp_observations = []
        while not terminal:
            step += 1
            with t.no_grad():
                old_state = state
                # agent model inference
                old_hidden = hidden
                action, _, _, hidden = rppo.act({
                    "mem": state,
                    "hidden": hidden
                })
                state, reward, terminal, _ = env.step(action.item())
                state = convert(state)
                total_reward += reward
Exemple #42
0
        if not entries:
            return None
        if len(entries) is 1:
            entries[0]['extra'] = None
            return [entries[0]]
        return entries
    
if __name__ == '__main__':
    con = Controller()
    keys = ['wanght2008', 'guptat2007', 
        'lencckn2010', 'guptat2007-1', 'guptat2007-11']

    for key in keys:
        entries = con.get(key)

        if not entries:
            print 'WARNING: no entries for %s--------------------' % key
            continue
        elif len(entries) > 1:
            print 'WARNING: multiple entries for %s--------------' % key
        else:
            print '-------------------------------------------------------'

        for x in entries:        
            print util.convert(bib_string(x))