Example #1
0
 def make_new(string_key, hash_method="sha256", text_encoding="utf-8"):
     encryption_hash, encryption_hash_salt = utils.hash_string(
         string_key, hash_method=hash_method, text_encoding="utf-8")
     storage_hash, storage_hash_salt = utils.hash_string(
         string_key, hash_method=hash_method, text_encoding="utf-8")
     iv = None
     data_length = -1
     crypto_data = HashBasedCryptoData(from_code=True)
     for data_part_name in [
             "encryption_hash", "encryption_hash_salt", "storage_hash",
             "storage_hash_salt", "iv", "data_length"
     ]:
         setattr(crypto_data, data_part_name, locals()[data_part_name])
     crypto_data.iv_is_from_deserializer = False
     return crypto_data
Example #2
0
 async def get(self, _, uid, rhash, answer):
     try:
         user = await Users.get_by_id(int(uid))
         uhash = hash_string(user.name + str(user.id) + user.email)
         if not user.accepted:
             logging.error('{} was trying to hack us'.format(user.email))
             return json({'msg': 'Nice try! But nope.'})
         if user.confirmation != 'noans':
             return json({
                 'msg':
                 'Sorry, it is not possible to change your mind now'
             })
         if uhash == rhash:
             if answer == 'yes':
                 user.confirmation = 'ack'
                 await user.update()
                 return json({'msg': 'Widzimy się w Sobotę 23.09.2017!'})
             elif answer == 'no':
                 user.confirmation = 'rej_user'
                 await user.update()
                 return json({'msg': 'Szkoda, że się już nie zobaczymy'})
         else:
             return json({'msg': 'wrong hash'})
     except:
         logging.exception('AbsenceConfirmation')
         return json({'msg': 'wrong data'})
Example #3
0
 async def post(self):
     global _users
     try:
         req = self.req.json
         user = await Users.get_first('email', req.get('email', ''))
         if not user:
             return json({'msg': 'User not found'}, status=404)
         if not user.active:
             return json({'msg': 'User not active'}, status=404)
         if hash_string(req.get('password', 'x')) == user.password:
             session_uuid = await user.get_session_uuid()
             _users[session_uuid] = user
             return json({
                 'success': True,
                 'admin': user.admin,
                 'mentor': user.mentor,
                 'name': user.name,
                 'email': user.email,
                 'surname': user.surname,
                 'lang': user.lang,
                 'organiser': user.organiser,
                 'id': user.id,
                 'session_uuid': session_uuid,
                 'confirmation': user.confirmation,
                 'gdpr': user.gdpr,
             })
         else:
             return self.user_error
     except DoesNotExist:
         return self.user_error
     except:
         logging.exception('err authentication.post')
     return json({'msg': 'internal error'}, status=500)
Example #4
0
 async def post(self, request, current_user):
     try:
         req = request.json
         if hash_string(req.get('password', 'x')) == current_user.password:
             if req['new_password'] == req['new_password_2']:
                 await current_user.set_password(req['new_password'])
                 await current_user.update()
                 return json({
                     "success": True,
                     "msg": "You have Successfully changed password"
                 })
             return json({
                 "success": False,
                 "msg": "You provided different new passwords"
             })
         return json({
             "success": False,
             "msg": "You provided wrong old password"
         })
     except:
         logging.exception('authentication.post')
     return json({
         'msg': 'internal error sorry please let us now',
         "success": False
     })
Example #5
0
 async def post(self, request, current_user):
     try:
         req = request.json
         validation_outcome = await current_user.validate_password(
             req['new_password'])
         if not validation_outcome['success']:
             return json(validation_outcome)
         if hash_string(req.get('password', 'x')) == current_user.password:
             if req['new_password'] == req['new_password_2']:
                 await current_user.set_password(req['new_password'])
                 await current_user.update()
                 return json({
                     "success":
                     True,
                     "msg":
                     "You have successfully changed your password"
                 })
             return json({
                 "success": False,
                 "msg": "You provided different new password"
             })
         return json({
             "success": False,
             "msg": "You provided wrong old password"
         })
     except:
         logging.exception('authentication.post')
     return json({
         'msg': 'Sorry, internal error. Please let us now!',
         "success": False
     })
Example #6
0
def spoof_cert ( server_cert, ca_cert, key ): # x509 Objects not strings
	CN = server_cert.get_subject().commonName
	#print ( CN )
	#CERTFILE = "%s_cert.pem" % CN
	CERTFILE = hash_string ( CN )
	cert_dir = "certificates/"
	temp = cert_dir + CERTFILE

	# set issuer of server_cert with subject of ca_cert
	#print ( ca_cert.get_subject() )
	server_cert.set_issuer( ca_cert.get_subject() )
	#print ( server_cert.get_issuer() )

	server_cert.set_pubkey( ca_cert.get_pubkey() )

	# sign the certificate using ca_cert key
	#server_cert.sign ( ca_cert.get_pubkey(), 'sha256' )
	#key = crypto.load_privatekey ( crypto.FILETYPE_PEM, ca_cert )
	server_cert.sign ( key, 'sha256' )

	# dump new certificate in directory
	open(join(cert_dir, CERTFILE), "wb").write(
	crypto.dump_certificate(crypto.FILETYPE_PEM, server_cert))

	return CERTFILE
Example #7
0
 async def post(self, request):
     global _users
     try:
         req = request.json
         user = await Users.get_first('email', req.get('email', ''))
         if not user:
             return json({'msg': 'User not found'}, status=404)
         if not user.active:
             return json({'msg': 'User not active'}, status=404)
         if hash_string(req.get('password', 'x')) == user.password:
             user.session_uuid = str(uuid4()).replace('-', '')
             user.last_login = datetime.utcnow()
             await user.update()
             _users[user.session_uuid] = user
             return json({
                 'success': True,
                 'admin': user.admin,
                 'mentor': user.mentor,
                 'name': user.name,
                 'email': user.email,
                 'surname': user.surname,
                 'lang': user.lang,
                 'organiser': user.organiser,
                 'id': user.id,
                 'session_uuid': user.session_uuid,
                 'confirmation': user.confirmation
             })
         else:
             return self.user_error
     except DoesNotExist:
         return self.user_error
     except:
         logging.exception('err authentication.post')
     return json({'msg': 'internal error'}, status=500)
Example #8
0
    def store_alerts(self, alert):
        pre_hash_string = alert["mash_expression"] + alert["active_since"]
        hash_key = utils.hash_string(pre_hash_string)
        alert["hash_key"] = hash_key

        new_entry = TSAlertsJSON(id=hash_key, json=alert, active_until=None)
        new_entry.put()
        memcache.set(hash_key, alert)
Example #9
0
 async def post(self, request, current_user):
     req = request.json
     link = 'https://{}/api/workshopabsence/'.format(request.host)
     if req['email_type'] == 'EmailCustom':
         users = await Users.get_by_many_field_value(**req['recipients'])
         subject = req['subject']
         text = req['text'].format()
         await send_email(
             recipients=[u.email for u in users],
             text=text,
             subject=subject
         )
     elif req['email_type'] == 'EmailTooLate':
         users = await Users.get_by_many_field_value(**req['recipients'])
         for user in users:
             user.confirmation = 'rej_time'
             await user.update()
             email_data = {
                 "name": user.name
             }
             subject = req['subject']
             text = req['text'].format(**email_data)
             await send_email(
                 recipients=[user.email],
                 text=text,
                 subject=subject
             )
             await asyncio.sleep(0.05)
     elif req['email_type'] == "per_user":
         users = await Users.get_by_many_field_value(**req['recipients'])
         for user in users:
             uhash = hash_string(user.name + str(user.id) + user.email)
             email_data = {
                 "link_yes": link + str(user.id) + '/' + uhash + '/' + 'yes',
                 "link_no": link + str(user.id) + '/' + uhash + '/' + 'no',
                 "name": user.name,
                 "what_can_you_bring": user.what_can_you_bring
             }
             subject = req['subject']
             text = req['text'].format(email_data)
             await send_email(
                 recipients=user.email,
                 text=text,
                 subject=subject
             )
     else:
         users = await Users.get_by_many_field_value(**req['recipients'])
         recip = []
         for user in users:
             recip.append(user.email)
         subject = req['subject']
         text = req['text']
         await send_email(
             recipients=recip,
             text=text,
             subject=subject
         )
     return json({'success': True, 'count': len(users)})
Example #10
0
    def enqueue_song(self, song_hash):
        self._last_hb_ts = time.time() * MICROSECONDS
        content = utils.unserialize_response(request.get_data())
        command_epoch = content['command_epoch']
        master_post_hash = content['hashed_post_playlist']
        master_current_song = content['current_song']
        failover_resp = utils.format_rpc_response(False, ENQUEUE, {}, \
                                             msg='Replica in recovery mode', \
                                             command_epoch=command_epoch)
        if self._in_recovery:
            return utils.serialize_response(failover_resp)
        print "In Enqueue"
        print "Enqueue: " + str(self._current_song)

        replica_pre_hash = utils.hash_string(pickle.dumps(
            self._playlist_queue))
        if replica_pre_hash == master_post_hash and self._current_song == master_current_song:
            print "Already Performed Operation in Enqueue"
            repeat_resp = utils.format_rpc_response(False, ENQUEUE, {}, \
                                                 msg='Already performed operation', \
                                                 command_epoch=command_epoch)
            return utils.serialize_response(repeat_resp)

        # Do enqueue, check for failover mode
        song_not_exist = not os.path.exists(utils.get_music_path(song_hash))
        self._playlist_queue.append(song_hash)
        replica_post_hash = utils.hash_string(
            pickle.dumps(self._playlist_queue))
        inconsistent_queue = master_post_hash != replica_post_hash or \
                             master_current_song != self._current_song
        print "queue hashes match: " + str(
            master_post_hash == replica_post_hash)
        print "current song matches: " + str(
            master_current_song) + " == " + str(self._current_song)
        replica_failover = song_not_exist or inconsistent_queue
        if replica_failover:
            self._in_recovery = True
            return utils.serialize_response(failover_resp)
        master_time = content['time']
        self._master_timestamp = master_time
        resp = utils.format_rpc_response(True, ENQUEUE, {'enqueued': True}, \
                                         command_epoch=command_epoch)
        print str(resp)
        return utils.serialize_response(resp)
Example #11
0
 def check_string_key(self,
                      string_key,
                      hash_method="sha256",
                      text_encoding="utf-8"):
     storage_hash_with_given_key, _storage_hash_salt = utils.hash_string(
         string_key,
         salt=self.storage_hash_salt,
         hash_method=hash_method,
         text_encoding=text_encoding)
     if storage_hash_with_given_key != self.storage_hash:
         raise InvalidKeyError(string_key)
Example #12
0
    def dequeue_song(self):
        self._last_hb_ts = time.time() * MICROSECONDS
        content = utils.unserialize_response(request.get_data())
        command_epoch = content['command_epoch']
        master_post_hash = content['hashed_post_playlist']
        master_current_song = content['current_song']
        failover_resp = utils.format_rpc_response(False, DEQUEUE, {}, \
                                             msg='Replica in recovery mode', \
                                             command_epoch=command_epoch)
        if self._in_recovery:
            return utils.serialize_response(failover_resp)
        print "In Dequeue"

        replica_pre_hash = utils.hash_string(pickle.dumps(
            self._playlist_queue))
        if replica_pre_hash == master_post_hash and self._current_song == master_current_song and self._current_song != None:
            repeat_resp = utils.format_rpc_response(False, DEQUEUE, {}, \
                                                 msg='Already performed operation', \
                                                 command_epoch=command_epoch)
            return utils.serialize_response(repeat_resp)

        # Check for length 0 queue
        if len(self._playlist_queue) == 0:
            self._current_song = None
        else:
            self._current_song = self._playlist_queue.popleft()
        replica_post_hash = utils.hash_string(
            pickle.dumps(self._playlist_queue))

        if (replica_post_hash != master_post_hash
                or self._current_song != master_current_song):
            self._in_recovery = True
            return utils.serialize_response(failover_resp)
        master_time = content['time']
        self._master_timestamp = master_time
        resp = utils.format_rpc_response(True, DEQUEUE, {}, \
                                         msg='Successfully dequeued', \
                                         command_epoch=command_epoch)
        print "end of dequeue: current song for replica: " + str(
            self._current_song)
        return utils.serialize_response(resp)
Example #13
0
    def forward(self, return_status=True, play=True):
        song = None
        success_response = utils.format_client_response(\
                                          True, FORWARD, {}, \
                                          client_req_id=self._client_req_id)
        # No song in future and no song currently playing, nothing to do.
        if len(self._playlist_queue) == 0 and self._current_song == None:
            if return_status:
                self._status_queue.put(success_response)
            return

        # After a forward command we are always at the start of a song
        self._current_offset = 0
        # No songs to play anymore
        if len(self._playlist_queue) == 0:
            print "forward: no songs in playlist"
            self._current_song = None
            with open(PLAYLIST_STATE_FILE, 'w') as f:
                data = utils.format_playlist_state(self._playlist_queue,
                                                   self._current_song)
                f.write(data)
        # Pop out a song to play
        else:
            print "forward: popping song"
            self._current_song = self._playlist_queue.popleft()
            with open(PLAYLIST_STATE_FILE, 'w') as f:
                data = utils.format_playlist_state(self._playlist_queue,
                                                   self._current_song)
                f.write(data)
        hashed_post_playlist = utils.hash_string(
            pickle.dumps(self._playlist_queue))

        # Synchronizes dequeue operation across all replicas (for master recovery)
        rpc_data = {'hashed_post_playlist': hashed_post_playlist, \
                    'current_song' : self._current_song, \
                    'time': time.time() }
        # Try indefinitely until we get at least f+1 responses
        # Guaranteed RPC won't add to queue since new command_epoch prevents
        # Holding mutexes just in case
        self.exponential_backoff(rpc_data, DEQUEUE, \
                                 DEQUEUE_URL, \
                                 REPLICA_ACK_TIMEOUT)

        # Start playing the next song
        # (if current_song == None then will just stop playing music)
        if play:
            self.play(False)
        if return_status:
            self._status_queue.put(success_response)
Example #14
0
def enqueue_song(song_path):
    load_song(song_path)
    assert(os.path.exists(song_path))
    with open(song_path, 'r') as f:
        song_bytes = f.read()
        song_hash = utils.hash_string(song_bytes)
    url = get_url(ENQUEUE) + "/" + song_hash
    try: 
        r = urllib2.urlopen(url)
    except Exception:
        print "Error in Enqueue Song"
    else:
        master_response = utils.unserialize_response(r.read())
        if master_response['success'] == True:
            print song_path + ' has been enqueued'
        else:
            print song_path + ' cannot be enqueued'
        print master_response['client_req_id']
Example #15
0
def enqueue_song(song_path):
    load_song(song_path)
    assert (os.path.exists(song_path))
    with open(song_path, 'r') as f:
        song_bytes = f.read()
        song_hash = utils.hash_string(song_bytes)
    url = get_url(ENQUEUE) + "/" + song_hash
    try:
        r = urllib2.urlopen(url)
    except Exception:
        print "Error in Enqueue Song"
    else:
        master_response = utils.unserialize_response(r.read())
        if master_response['success'] == True:
            print song_path + ' has been enqueued'
        else:
            print song_path + ' cannot be enqueued'
        print master_response['client_req_id']
Example #16
0
 async def post(self, request, current_user):
     data = request.json
     mail_data = data['mail']
     users = await Users.get_by_many_field_value(
         **data['recipients']['conditions'])
     if mail_data['per_user']:
         for user in users:
             if mail_data['email_type'] == 'EmailTooLate':
                 user.confirmation = 'rej_time'
                 await user.update()
                 email_data = {"name": user.name}
             else:
                 link = 'https://{}/api/event/absence/{}/{}/'.format(
                     request.host, str(user.id),
                     hash_string(user.name + str(user.id) + user.email))
                 email_data = {
                     "link_yes": link + 'yes',
                     "link_no": link + 'no',
                     "name": user.name,
                     "what_can_you_bring": user.what_can_you_bring
                 }
             subject = mail_data['subject']
             text = mail_data['text'].format(**email_data)
             await send_email(recipients=[user.email],
                              text=text,
                              subject=subject)
             await asyncio.sleep(0.03)
     else:
         subject = mail_data['subject']
         text = mail_data['text']
         for x in range(len(users) // 50):
             await send_email(
                 recipients=[u.email for u in users[x * 50:(x + 1) * 50]],
                 text=text,
                 subject=subject)
             await asyncio.sleep(0.03)
     return json({
         'success':
         True,
         'msg':
         "Send: {} e-mail to {}".format(len(users),
                                        data['recipients']['name'])
     })
Example #17
0
 def use_existing(string_key,
                  serialized_format,
                  hash_method="sha256",
                  delimiter=':',
                  text_encoding="utf-8"):
     crypto_data = HashBasedCryptoDataBidirSerializer.deserialize(
         serialized_format,
         delimiter=delimiter,
         text_encoding=text_encoding)
     crypto_data.check_string_key(string_key,
                                  hash_method=hash_method,
                                  text_encoding=text_encoding)
     # We now know that the given key is valid! generate the encryption hash using the given key and stored salt
     crypto_data.encryption_hash, _salt = utils.hash_string(
         string_key,
         salt=crypto_data.encryption_hash_salt,
         hash_method=hash_method,
         text_encoding=text_encoding)
     crypto_data.iv_is_from_deserializer = True  # Helps keep the programmer from accidentally overwriting the IV by deserializing and then calling encrypt before decrypt
     return crypto_data
Example #18
0
    def enqueue_song(self, params):
        song_hash = params['song_hash']
        self.load_song(params)
        self._playlist_queue.append(song_hash)
        with open(PLAYLIST_STATE_FILE, 'w') as f:
            data = utils.format_playlist_state(self._playlist_queue,
                                               self._current_song)
            f.write(data)
        hashed_post_playlist = utils.hash_string(
            pickle.dumps(self._playlist_queue))

        rpc_data = {'current_song': self._current_song, \
                    'hashed_post_playlist': hashed_post_playlist, \
                    'time': time.time() }

        self.exponential_backoff(rpc_data, ENQUEUE, \
                                 ENQUEUE_URL + '/' + song_hash, \
                                 REPLICA_ACK_TIMEOUT)
        self._status_queue.put(utils.format_client_response(\
                                   True, ENQUEUE, {}, \
                                   client_req_id=self._client_req_id))
Example #19
0
def enqueue(song_file):
    global playlist
    playlist.append(song_file)
    return 'songthing'
    song_path = MUSIC_DIR + song_file
    load_song(song_path)
    assert(os.path.exists(song_path))
    with open(song_path, 'r') as f:
        song_bytes = f.read()
        song_hash = utils.hash_string(song_bytes)
    url = get_url(ENQUEUE) + "/" + song_hash
    try: 
        r = urllib2.urlopen(url)
    except Exception:
        print "Error in Enqueue Song"
    else:
        master_response = utils.unserialize_response(r.read())
        if master_response['success'] == True:
            print song_path + ' has been enqueued'
            playlist.append(song_file)
        else:
            print song_path + ' cannot be enqueued'
        print master_response['client_req_id']
    return 'enqueue'
Example #20
0
def enqueue(song_file):
    global playlist
    playlist.append(song_file)
    return 'songthing'
    song_path = MUSIC_DIR + song_file
    load_song(song_path)
    assert (os.path.exists(song_path))
    with open(song_path, 'r') as f:
        song_bytes = f.read()
        song_hash = utils.hash_string(song_bytes)
    url = get_url(ENQUEUE) + "/" + song_hash
    try:
        r = urllib2.urlopen(url)
    except Exception:
        print "Error in Enqueue Song"
    else:
        master_response = utils.unserialize_response(r.read())
        if master_response['success'] == True:
            print song_path + ' has been enqueued'
            playlist.append(song_file)
        else:
            print song_path + ' cannot be enqueued'
        print master_response['client_req_id']
    return 'enqueue'
Example #21
0
    def get_block(self, block_hash):
        for block in self.blocks:
            if hash_string(str(block)) == block_hash:
                return block

        return None
Example #22
0
def main(args):

    ####################
    # Arguments
    gpu = args.gpu
    model_name = args.model
    initial_tree_sampling = args.initial_tree_sampling
    path_config = args.config
    data_augmentation = args.data_augmentation
    trial_name = args.name
    actiontype = args.actiontype
    max_epoch = args.max_epoch
    dev_size = args.dev_size

    # Check
    assert actiontype in ["train", "evaluate"]
    if actiontype == "train":
        assert max_epoch > 0
    assert len(initial_tree_sampling.split("_")) == 3
    for type_ in initial_tree_sampling.split("_"):
        assert type_ in ["X", "BU", "TD", "RB", "LB", "RB2"]
    assert initial_tree_sampling.split("_")[2] != "X"
    assert initial_tree_sampling.split("_")[1] != "RB2"
    assert initial_tree_sampling.split("_")[2] != "RB2"

    if trial_name is None or trial_name == "None":
        trial_name = utils.get_current_time()

    ####################
    # Path setting
    config = utils.Config(path_config)

    basename = "%s.%s.%s.aug_%s.%s" \
            % (model_name,
               initial_tree_sampling,
               utils.get_basename_without_ext(path_config),
               data_augmentation,
               trial_name)

    if actiontype == "train":
        path_log = os.path.join(config.getpath("results"),
                                basename + ".training.log")
    elif actiontype == "evaluate":
        path_log = os.path.join(config.getpath("results"),
                                basename + ".evaluation.log")
    path_train = os.path.join(config.getpath("results"),
                              basename + ".training.jsonl")
    path_valid = os.path.join(config.getpath("results"),
                              basename + ".validation.jsonl")
    path_snapshot = os.path.join(config.getpath("results"),
                                 basename + ".model")
    path_pred = os.path.join(config.getpath("results"),
                             basename + ".evaluation.ctrees")
    path_eval = os.path.join(config.getpath("results"),
                             basename + ".evaluation.json")

    utils.set_logger(path_log)

    ####################
    # Random seed
    random_seed = trial_name
    random_seed = utils.hash_string(random_seed)
    random.seed(random_seed)
    np.random.seed(random_seed)
    cuda.cupy.random.seed(random_seed)

    ####################
    # Log so far
    utils.writelog("gpu=%d" % gpu)
    utils.writelog("model_name=%s" % model_name)
    utils.writelog("initial_tree_sampling=%s" % initial_tree_sampling)
    utils.writelog("path_config=%s" % path_config)
    utils.writelog("data_augmentation=%s" % data_augmentation)
    utils.writelog("trial_name=%s" % trial_name)
    utils.writelog("actiontype=%s" % actiontype)
    utils.writelog("max_epoch=%s" % max_epoch)
    utils.writelog("dev_size=%s" % dev_size)

    utils.writelog("path_log=%s" % path_log)
    utils.writelog("path_train=%s" % path_train)
    utils.writelog("path_valid=%s" % path_valid)
    utils.writelog("path_snapshot=%s" % path_snapshot)
    utils.writelog("path_pred=%s" % path_pred)
    utils.writelog("path_eval=%s" % path_eval)

    utils.writelog("random_seed=%d" % random_seed)

    ####################
    # Data preparation
    begin_time = time.time()

    train_dataset = dataloader.read_rstdt("train",
                                          relation_level="coarse-grained",
                                          with_root=False)
    test_dataset = dataloader.read_rstdt("test",
                                         relation_level="coarse-grained",
                                         with_root=False)
    vocab_word = utils.read_vocab(
        os.path.join(config.getpath("data"), "rstdt-vocab", "words.vocab.txt"))
    vocab_postag = utils.read_vocab(
        os.path.join(config.getpath("data"), "rstdt-vocab",
                     "postags.vocab.txt"))
    vocab_deprel = utils.read_vocab(
        os.path.join(config.getpath("data"), "rstdt-vocab",
                     "deprels.vocab.txt"))

    if data_augmentation:
        external_train_dataset = dataloader.read_ptbwsj_wo_rstdt(
            with_root=False)
        # Remove documents with only one leaf node
        external_train_dataset = utils.filter_dataset(
            external_train_dataset,
            condition=lambda data: len(data.edu_ids) > 1)

    end_time = time.time()
    utils.writelog("Loaded the corpus. %f [sec.]" % (end_time - begin_time))

    ####################
    # Hyper parameters
    word_dim = config.getint("word_dim")
    postag_dim = config.getint("postag_dim")
    deprel_dim = config.getint("deprel_dim")
    lstm_dim = config.getint("lstm_dim")
    mlp_dim = config.getint("mlp_dim")
    n_init_epochs = config.getint("n_init_epochs")
    negative_size = config.getint("negative_size")
    batch_size = config.getint("batch_size")
    weight_decay = config.getfloat("weight_decay")
    gradient_clipping = config.getfloat("gradient_clipping")
    optimizer_name = config.getstr("optimizer_name")

    utils.writelog("word_dim=%d" % word_dim)
    utils.writelog("postag_dim=%d" % postag_dim)
    utils.writelog("deprel_dim=%d" % deprel_dim)
    utils.writelog("lstm_dim=%d" % lstm_dim)
    utils.writelog("mlp_dim=%d" % mlp_dim)
    utils.writelog("n_init_epochs=%d" % n_init_epochs)
    utils.writelog("negative_size=%d" % negative_size)
    utils.writelog("batch_size=%d" % batch_size)
    utils.writelog("weight_decay=%f" % weight_decay)
    utils.writelog("gradient_clipping=%f" % gradient_clipping)
    utils.writelog("optimizer_name=%s" % optimizer_name)

    ####################
    # Model preparation
    cuda.get_device(gpu).use()

    # Initialize a model
    utils.mkdir(os.path.join(config.getpath("data"), "caches"))
    path_embed = config.getpath("pretrained_word_embeddings")
    path_caches = os.path.join(
        config.getpath("data"), "caches",
        "cached." + os.path.basename(path_embed) + ".npy")
    if os.path.exists(path_caches):
        utils.writelog("Loading cached word embeddings ...")
        initialW = np.load(path_caches)
    else:
        initialW = utils.read_word_embedding_matrix(path=path_embed,
                                                    dim=word_dim,
                                                    vocab=vocab_word,
                                                    scale=0.0)
        np.save(path_caches, initialW)

    if model_name == "spanbasedmodel":
        # Span-based model w/ template features
        template_feature_extractor = models.TemplateFeatureExtractor(
            dataset=train_dataset)
        utils.writelog("Template feature size=%d" %
                       template_feature_extractor.feature_size)
        if actiontype == "train":
            for template in template_feature_extractor.templates:
                dim = template_feature_extractor.template2dim[template]
                utils.writelog("Template feature #%s %s" % (dim, template))
        model = models.SpanBasedModel(
            vocab_word=vocab_word,
            vocab_postag=vocab_postag,
            vocab_deprel=vocab_deprel,
            word_dim=word_dim,
            postag_dim=postag_dim,
            deprel_dim=deprel_dim,
            lstm_dim=lstm_dim,
            mlp_dim=mlp_dim,
            initialW=initialW,
            template_feature_extractor=template_feature_extractor)
    elif model_name == "spanbasedmodel2":
        # Span-based model w/o template features
        model = models.SpanBasedModel2(vocab_word=vocab_word,
                                       vocab_postag=vocab_postag,
                                       vocab_deprel=vocab_deprel,
                                       word_dim=word_dim,
                                       postag_dim=postag_dim,
                                       deprel_dim=deprel_dim,
                                       lstm_dim=lstm_dim,
                                       mlp_dim=mlp_dim,
                                       initialW=initialW)
    else:
        raise ValueError("Invalid model_name=%s" % model_name)
    utils.writelog("Initialized the model ``%s''" % model_name)

    # Load pre-trained parameters
    if actiontype != "train":
        serializers.load_npz(path_snapshot, model)
        utils.writelog("Loaded trained parameters from %s" % path_snapshot)

    model.to_gpu(gpu)

    ####################
    # Decoder preparation
    decoder = decoders.IncrementalCKYDecoder()

    ####################
    # Initializer preparation
    sampler = treesamplers.TreeSampler(initial_tree_sampling.split("_"))

    ####################
    # Training / evaluation
    if actiontype == "train":
        with chainer.using_config("train", True):
            if dev_size > 0:
                # Training with cross validation
                train_dataset, dev_dataset = utils.split_dataset(
                    dataset=train_dataset, n_dev=dev_size, seed=None)
                with open(
                        os.path.join(config.getpath("results"),
                                     basename + ".valid_gold.ctrees"),
                        "w") as f:
                    for data in dev_dataset:
                        f.write("%s\n" % " ".join(data.nary_sexp))
            else:
                # Training with the full training set
                dev_dataset = None

            if data_augmentation:
                train_dataset = np.concatenate(
                    [train_dataset, external_train_dataset], axis=0)

            train(model=model,
                  decoder=decoder,
                  sampler=sampler,
                  max_epoch=max_epoch,
                  n_init_epochs=n_init_epochs,
                  negative_size=negative_size,
                  batch_size=batch_size,
                  weight_decay=weight_decay,
                  gradient_clipping=gradient_clipping,
                  optimizer_name=optimizer_name,
                  train_dataset=train_dataset,
                  dev_dataset=dev_dataset,
                  path_train=path_train,
                  path_valid=path_valid,
                  path_snapshot=path_snapshot,
                  path_pred=os.path.join(config.getpath("results"),
                                         basename + ".valid_pred.ctrees"),
                  path_gold=os.path.join(config.getpath("results"),
                                         basename + ".valid_gold.ctrees"))

    elif actiontype == "evaluate":
        with chainer.using_config("train", False), chainer.no_backprop_mode():
            # Test
            parse(model=model,
                  decoder=decoder,
                  dataset=test_dataset,
                  path_pred=path_pred)
            scores = metrics.rst_parseval(
                pred_path=path_pred,
                gold_path=os.path.join(config.getpath("data"), "rstdt", "wsj",
                                       "test", "gold.labeled.nary.ctrees"))
            old_scores = metrics.old_rst_parseval(
                pred_path=path_pred,
                gold_path=os.path.join(config.getpath("data"), "rstdt", "wsj",
                                       "test", "gold.labeled.nary.ctrees"))
            out = {
                "Morey2018": {
                    "Unlabeled Precision": scores["S"]["Precision"] * 100.0,
                    "Precision_info": scores["S"]["Precision_info"],
                    "Unlabeled Recall": scores["S"]["Recall"] * 100.0,
                    "Recall_info": scores["S"]["Recall_info"],
                    "Micro F1": scores["S"]["Micro F1"] * 100.0
                },
                "Marcu2000": {
                    "Unlabeled Precision":
                    old_scores["S"]["Precision"] * 100.0,
                    "Precision_info": old_scores["S"]["Precision_info"],
                    "Unlabeled Recall": old_scores["S"]["Recall"] * 100.0,
                    "Recall_info": old_scores["S"]["Recall_info"],
                    "Micro F1": old_scores["S"]["Micro F1"] * 100.0
                }
            }
            utils.write_json(path_eval, out)
            utils.writelog(utils.pretty_format_dict(out))

    utils.writelog("Done: %s" % basename)
  def election_reconnect_or_fail(self):
    # first notify election service that we are eligible to cast votes
    # after 0.5 seconds if master hasnt been elected, try to get votes ourselves
    # finally, if we fail to get votes after 1 second, assume we are partitioned
    # and go into failure mode
    my_time = time.time()
    print "writing playlist state to file: " + str(my_time)
    with open(PLAYLIST_STATE_FILE, 'w') as f:
        data = utils.format_playlist_state(self._parent._playlist_queue, self._parent._current_song, \
                                            self._parent._master_term, self._parent._master_timestamp)
        f.write(data)
    print "notifying election service of failure"
    
    replica_url = 'http://' + self._parent._ip + ":" + VOTE_PORT + FAIL_URL
    r = RPC(self, VOTE, url=replica_url, ip=self._parent._ip, data={})
    r.start()
    
    time.sleep(0.5)
    if (time.time()*MICROSECONDS - self._parent._last_hb_ts) > self._timeout_threshold:
        print "failed to elect new master, requesting votes"
        queue_hash = utils.hash_string(pickle.dumps(self._parent._playlist_queue))
        status_update_dict = { "request_votes" : True, "queue_hash" : queue_hash, \
            "current_song": self._parent._current_song, "term":self._parent._master_term,\
            "timestamp" : self._parent._master_timestamp}
        self._parent._state_queue.put(status_update_dict)
        print "waiting for response"
        for i in range(0, 4):
            try:
                resp = self._parent._response_queue.get(True, 0.3)
                print "got response: " + str(resp)
                if "success" in resp:
                    self.get_new_timeout_threshold()
                    my_time = time.time()
                    print "won election, starting master process : " + str(my_time) 
                    os.system("./master.py -r " + str(self._parent._master_term + 1))
                    return
                else:
                    continue
            except Queue.Empty:
                continue
    else:
        replica_url = 'http://' + self._parent._ip + ":" + VOTE_PORT + UNFAIL_URL
        r = RPC(self, VOTE, url=replica_url, ip=self._parent._ip, data={})
        r.start()
    
    # wait for others to try to get elected again
    for i in range (0, 20):
        time.sleep(0.25)        
        # check last time for new master
        if (time.time()*MICROSECONDS - self._parent._last_hb_ts) <= self._timeout_threshold:
            my_time = time.time()
            print "someone won election: " + str(my_time)
            replica_url = 'http://' + self._parent._ip + ":" + VOTE_PORT + UNFAIL_URL
            r = RPC(self, VOTE, url=replica_url, ip=self._parent._ip, data={})
            r.start()
            status_update_dict = { "reset_election" : True }
            self._parent._state_queue.put(status_update_dict)
            return

    # if we get here, we should assume we have failed and enter failure mode
    print "FAILED: going into recovery mode"
    self.get_new_timeout_threshold()
    self._parent._in_recovery = True
    pygame.mixer.music.stop()
Example #24
0
def test_hash_string():
    expected_hash = "9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08"
    assert hash_string('test') == expected_hash
Example #25
0
 async def create(self):
     self.password = hash_string(self.password)
     return await super().create()
Example #26
0
 async def set_password(self, password):
     self.password = hash_string(password)
Example #27
0
def main(args):

    ####################
    # Arguments
    tree_sampling = args.tree_sampling  # NOTE
    trial_name = args.name

    # Check
    assert len(tree_sampling.split("_")) == 3
    for type_ in tree_sampling.split("_"):
        assert type_ in ["X", "BU", "TD", "RB", "LB", "RB2"]
    assert tree_sampling.split("_")[2] != "X"
    assert tree_sampling.split("_")[1] != "RB2"
    assert tree_sampling.split("_")[2] != "RB2"

    if trial_name is None or trial_name == "None":
        trial_name = utils.get_current_time()

    ####################
    # Path setting
    config = utils.Config()

    basename = "%s.%s" \
            % (tree_sampling,
               trial_name)

    utils.mkdir(os.path.join(config.getpath("results"), "baselines"))
    path_log = os.path.join(config.getpath("results"), "baselines",
                            basename + ".evaluation.log")
    path_pred = os.path.join(config.getpath("results"), "baselines",
                             basename + ".evaluation.ctrees")
    path_eval = os.path.join(config.getpath("results"), "baselines",
                             basename + ".evaluation.json")

    utils.set_logger(path_log)

    ####################
    # Random seed
    random_seed = trial_name
    random_seed = utils.hash_string(random_seed)
    np.random.seed(random_seed)
    cuda.cupy.random.seed(random_seed)

    ####################
    # Log so far
    utils.writelog("tree_sampling=%s" % tree_sampling)
    utils.writelog("trial_name=%s" % trial_name)

    utils.writelog("path_log=%s" % path_log)
    utils.writelog("path_pred=%s" % path_pred)
    utils.writelog("path_eval=%s" % path_eval)

    utils.writelog("random_seed=%d" % random_seed)

    ####################
    # Data preparation
    begin_time = time.time()

    test_databatch = dataloader.read_rstdt("test",
                                           relation_level="coarse-grained",
                                           with_root=False)

    end_time = time.time()
    utils.writelog("Loaded the corpus. %f [sec.]" % (end_time - begin_time))

    ####################
    # Tree-sampler preparation
    sampler = treesamplers.TreeSampler(tree_sampling.split("_"))  # NOTE

    with chainer.using_config("train", False), chainer.no_backprop_mode():
        parse(sampler=sampler, databatch=test_databatch, path_pred=path_pred)
        scores = rst_parseval.evaluate(
            pred_path=path_pred,
            gold_path=os.path.join(config.getpath("data"), "rstdt", "renamed",
                                   "test.labeled.nary.ctrees"))
        old_scores = old_rst_parseval.evaluate(
            pred_path=path_pred,
            gold_path=os.path.join(config.getpath("data"), "rstdt", "renamed",
                                   "test.labeled.nary.ctrees"))
        out = {
            "Morey2018": {
                "Unlabeled Precision": scores["S"]["Precision"] * 100.0,
                "Precision_info": scores["S"]["Precision_info"],
                "Unlabeled Recall": scores["S"]["Recall"] * 100.0,
                "Recall_info": scores["S"]["Recall_info"],
                "Micro F1": scores["S"]["Micro F1"] * 100.0
            },
            "Marcu2000": {
                "Unlabeled Precision": old_scores["S"]["Precision"] * 100.0,
                "Precision_info": old_scores["S"]["Precision_info"],
                "Unlabeled Recall": old_scores["S"]["Recall"] * 100.0,
                "Recall_info": old_scores["S"]["Recall_info"],
                "Micro F1": old_scores["S"]["Micro F1"] * 100.0
            }
        }
        utils.write_json(path_eval, out)
        utils.writelog(utils.pretty_format_dict(out))

    utils.writelog("Done.")