def test_hashing():    
    for test_value in ("This is a test value to hash", u"Now a uni\u0107ode string", 1231, datetime.date.today()):
        if isinstance(test_value, unicode):
            test_value = test_value.encode("utf-8")
        else:
            test_value = str(test_value)            
        public_hash = hmac.new(cryptography._get_public_salt(), msg=test_value, digestmod=hashlib.sha256).hexdigest()
        internal_hash = hmac.new(cryptography._get_salt(), msg=public_hash, digestmod=hashlib.sha256).hexdigest()
        
        assert internal_hash == cryptography.hash_value(test_value, pre_hashed=False)

        internal_hash = hmac.new(cryptography._get_salt(), msg=test_value, digestmod=hashlib.sha256).hexdigest()
        
        assert internal_hash == cryptography.hash_value(test_value, pre_hashed=True)
Exemple #2
0
def intake_user(api_type, api_version):
    form = IntakeUserForm(request.form)
    # TODO: Change this to validate_or_400 after writing intake tests
    #       to confirm it's proper behavior.
    if not form.validate():
        abort(
            400,
            "Request data validation failed with the following errors: \n%s" %
            "\n".join("%s - %s" % (field, ",".join(errors))
                      for field, errors in form.errors.items()))

    user_id = hash_value(form.user_id.data, pre_hashed=form._pre_hashed)
    existing_user = IntakeUser.query(IntakeUser.api_type == api_type,
                                     IntakeUser.user_id == user_id,
                                     IntakeUser.org == g.consumer.org).get()

    user_fields = {}
    user_fields.update(form.data)

    # Remove fields that are not to be persisted to the IntakeUser itself.
    for field in user_fields.keys():
        if not hasattr(IntakeUser, field):
            del user_fields[field]

    updated_user = IntakeUser.create_or_update(user_fields,
                                               g.consumer.key,
                                               g.consumer.org,
                                               api_type,
                                               existing_user=existing_user,
                                               pre_hashed=form._pre_hashed)

    return jsonify(user_id=form.user_id.data, is_new=(existing_user == None))
Exemple #3
0
def intake_user(api_type, api_version):    
    form = IntakeUserForm(request.form)
    # TODO: Change this to validate_or_400 after writing intake tests
    #       to confirm it's proper behavior.           
    if not form.validate():
        abort(400, "Request data validation failed with the following errors: \n%s" % 
                        "\n".join("%s - %s" % (field, ",".join(errors)) for field, errors in form.errors.items()))
    
    user_id = hash_value(form.user_id.data, pre_hashed=form._pre_hashed)
    existing_user = IntakeUser.query(IntakeUser.api_type == api_type,
                                     IntakeUser.user_id == user_id,
                                     IntakeUser.org == g.consumer.org).get()
    
    user_fields = {}
    user_fields.update(form.data)
    
    # Remove fields that are not to be persisted to the IntakeUser itself.
    for field in user_fields.keys():
        if not hasattr(IntakeUser, field):
            del user_fields[field]
    
    updated_user = IntakeUser.create_or_update(user_fields,
                                               g.consumer.key,
                                               g.consumer.org,   
                                               api_type,                                            
                                               existing_user=existing_user,
                                               pre_hashed=form._pre_hashed)
    
    return jsonify(user_id=form.user_id.data, is_new=(existing_user == None))
Exemple #4
0
def _rehash_pii(intake_user_key):
    intake_user = intake_user_key.get()
    iu_dict = intake_user.to_dict()
    for key in iu_dict.keys():
        if key.endswith("_enc"):
            key_for_hashed = key[:-4]
            encrypted_value = iu_dict[key]

            if not encrypted_value:
                setattr(intake_user, key_for_hashed, None)
                continue

            decrypted_value = decrypt_value(encrypted_value)

            if (isinstance(decrypted_value, str)
                    or isinstance(decrypted_value, unicode)):
                decrypted_value = decrypted_value.strip()

            if decrypted_value:
                setattr(intake_user, key_for_hashed,
                        hash_value(decrypted_value))
            else:
                setattr(intake_user, key_for_hashed, None)

    intake_user.put()
def check_intakeuser(intake_user, user_data, developer_key, org_key, 
                     pre_hashed=False):
    non_pii = ("date_joined", "date_banned", "reason_banned", "review_count", 
               "transaction_count", "positive_review_percentage")
    
    eq_(intake_user.developer, developer_key)
    eq_(intake_user.org, org_key)
        
    # Test that hashing went correctly
    for key, value in user_data.items():
        if key.startswith("date"):
            value = datetime.datetime.strptime(value, "%Y-%m-%d").date()
        
        # print "Key: %s" % key 
        # print "Intake: %s" % getattr(intake_user, key)
        # print "Test Data: %s" % value
                
        if key not in non_pii:
            # print "Intake Encrypted: %s" % getattr(intake_user, key+"_enc")
            # print "Intake Decrypt: %s" % cryptography.decrypt_value(getattr(intake_user, key+"_enc"))
            if pre_hashed:
                eq_(getattr(intake_user, key+"_enc"), None)
            else:
                eq_(cryptography.decrypt_value(getattr(intake_user, key+"_enc")), value)
            value = cryptography.hash_value(value, pre_hashed=pre_hashed)
        
        eq_(getattr(intake_user, key), value)
Exemple #6
0
def _rehash_pii(intake_user_key):
    intake_user = intake_user_key.get()
    iu_dict = intake_user.to_dict()
    for key in iu_dict.keys():
        if key.endswith("_enc"):
            key_for_hashed = key[:-4]
            encrypted_value = iu_dict[key]
            
            if not encrypted_value:
                setattr(intake_user, key_for_hashed, None)
                continue
            
            decrypted_value = decrypt_value(encrypted_value)
            
            if (isinstance(decrypted_value, str) or 
                isinstance(decrypted_value, unicode)):
                decrypted_value = decrypted_value.strip()
            
            if decrypted_value:
                setattr(intake_user, key_for_hashed, 
                        hash_value(decrypted_value))
            else:
                setattr(intake_user, key_for_hashed, None)
                
    intake_user.put()
Exemple #7
0
def find_users(pii_fields, api_type, org_key=None, pre_hashed=False):
    """
    Method for finding users matching the given PII. Adds more
    intelligence to the process through the following:
    
        Isolating "user sets", non-intersecting groups of users tied 
        to different fields of the given PII. This could be the innocent 
        result of sparse information, an indicator of fraud, or a sign that
        the given PII belongs to multiple individuals. 
    
        Idenfitying conflicting PII. These are PII values within a set 
        of supposedly coherent users that does not agree. This could be 
        the result of user using different contact information with 
        different organizations, or possibly a sign of fraud.
    
    Args:
        pii_fields: A dictionary of pii_field -> value
        
    Returns:
        A list of user sets. Each user set is a list of lists of 
        MatchingIntakeUsers, each sub-list representing a level of matching
        directness.
    """
    #logging.info("Find Users for: %s" % pii_fields)

    hashed_pii = {}
    for key, value in pii_fields.items():
        if value:
            hashed_pii[key] = hash_value(value) if not pre_hashed else value

    combined_pii = defaultdict(set)
    user_sets = []
    for field, value in hashed_pii.items():
        if field in combined_pii and value in combined_pii[field]:
            # If we've run across this pii value in a previous user set,
            # we don't need to search on it again.
            continue

        matching_users = find_matching_users({field: [value]},
                                             api_type,
                                             org_key=org_key)

        # Get rid of the dictionary part, we don't need it anymore.
        #logging.info("Matching Users: %s" % matching_users)
        user_set = [match_dict.values() for match_dict in matching_users]
        user_sets.append(user_set)

        user_set_pii = aggregate_pii(
            [miu.user for match_level in user_set for miu in match_level])

        for k, v in user_set_pii.items():
            combined_pii[k].update(v)

    return user_sets
Exemple #8
0
def find_users(pii_fields, api_type, org_key=None, pre_hashed=False):
    """
    Method for finding users matching the given PII. Adds more
    intelligence to the process through the following:
    
        Isolating "user sets", non-intersecting groups of users tied 
        to different fields of the given PII. This could be the innocent 
        result of sparse information, an indicator of fraud, or a sign that
        the given PII belongs to multiple individuals. 
    
        Idenfitying conflicting PII. These are PII values within a set 
        of supposedly coherent users that does not agree. This could be 
        the result of user using different contact information with 
        different organizations, or possibly a sign of fraud.
    
    Args:
        pii_fields: A dictionary of pii_field -> value
        
    Returns:
        A list of user sets. Each user set is a list of lists of 
        MatchingIntakeUsers, each sub-list representing a level of matching
        directness.
    """
    #logging.info("Find Users for: %s" % pii_fields)
    
    hashed_pii = {}
    for key, value in pii_fields.items():
        if value:
            hashed_pii[key] = hash_value(value) if not pre_hashed else value
    
    combined_pii = defaultdict(set)
    user_sets = []
    for field, value in hashed_pii.items():
        if field in combined_pii and value in combined_pii[field]:
            # If we've run across this pii value in a previous user set, 
            # we don't need to search on it again.
            continue
            
        matching_users = find_matching_users({field:[value]}, 
                                              api_type, 
                                              org_key=org_key)
        
        # Get rid of the dictionary part, we don't need it anymore.
        #logging.info("Matching Users: %s" % matching_users)
        user_set = [match_dict.values() for match_dict in matching_users]
        user_sets.append(user_set)        
                
        user_set_pii = aggregate_pii([miu.user for match_level in user_set 
                                                for miu in match_level])
                                                                            
        for k,v in user_set_pii.items():
            combined_pii[k].update(v)
            
    return user_sets
Exemple #9
0
    def create_or_update(cls, fields, dev_key, org_key, api_type,
                         existing_user=None, pre_hashed=False):
        """
        Creates an IntakeUser model from the given data or updates 
        an existing model.
        
        Note that this method DOES persist the resulting model to the database.
        
        Args:
            fields: dictionary of field -> value
            org_key: entity key for the org this user belongs to
            dev_key: entity key for the dev who uploaded this user
            existing_model: an existing intake user model to be updated
            pre_hashed: bool indicating whether the PII has already been hashed.
            db_key: key of the database entity to use as an ancestor for this 
                    user.
            
        Returns:
            An intake user model that has been persisted to the DB.
        """
        if existing_user:
            intake_user = existing_user
        else:
            intake_user = IntakeUser()

        intake_user.org = org_key
        intake_user.developer = dev_key
        intake_user.api_type = api_type

        values_to_set = {}           
        for field, value in fields.items():
            # Hash and encrypt the PII data     
            if field in (('user_id',) + PII_FIELDS):
                # Only save an encrypted version if we got raw data, it's 
                # a bit silly to save a copy of the raw pre-hashed data.
                
                if ((isinstance(value, str) or isinstance(value, unicode))
                    and not value.strip()):
                    # don't even save pure whitespace PII
                    continue
                    
                if not pre_hashed:
                    values_to_set[field+"_enc"] = encrypt_value(value)
                value = hash_value(value, pre_hashed=pre_hashed)
                         
            values_to_set[field] = value

        for key, value in values_to_set.items():
            setattr(intake_user, key, value)
        
        intake_user.put()
        
        return intake_user
Exemple #10
0
def test_submit():
    grant_submit()
    
    # Submit through developer 1
    dev1 = models.Developer.query(models.Developer.consumer_key == "valid_key1").get()
    consumer1 = oauth.Consumer(key=dev1.consumer_key, secret=dev1.consumer_secret)
    req = create_request(consumer1, "http://localhost/api/v1/submit/user", "POST", urlencode(test_data))    
    
    response = testapp.post("/api/v1/submit/user", req.to_postdata())    
    assert response.status_int == 200
    assert response.json["user_id"] == u"1"
    assert response.json["is_new"]
    
    assert models.IntakeUser.query().count() == 1
    intake_user = models.IntakeUser.query().get()
    
    check_intakeuser(intake_user, test_data, dev1.key, dev1.org)
    
    # Submit through developer 2
    dev2 = models.Developer.query(models.Developer.consumer_key == "valid_key2").get()    
    consumer2 = oauth.Consumer(key=dev2.consumer_key, secret=dev2.consumer_secret)
    req = create_request(consumer2, "http://localhost/api/v1/submit/user", "POST", urlencode(test_data))
    
    response = testapp.post("/api/v1/submit/user", req.to_postdata())    
    assert response.status_int == 200
    assert response.json["user_id"] == u"1"
    assert response.json["is_new"]
    
    assert models.IntakeUser.query().count() == 2
    
    intake_user = models.IntakeUser.query(models.IntakeUser.org == dev2.org).get()
    
    check_intakeuser(intake_user, test_data, dev2.key, dev2.org)
    
    # Submit an update through developer 1
    test_data2 = {}
    test_data2.update(test_data)
    test_data2["transaction_count"] = 1056
    test_data2["twitter_id"] = "1234567"
    
    req = create_request(consumer1, "http://localhost/api/v1/submit/user", "POST", urlencode(test_data2))    

    response = testapp.post("/api/v1/submit/user", req.to_postdata())    
    assert response.status_int == 200
    assert response.json["user_id"] == u"1"
    assert not response.json["is_new"]
    
    intake_user = models.IntakeUser.query(models.IntakeUser.org == dev1.org).get()
    
    check_intakeuser(intake_user, test_data2, dev1.key, dev1.org)
    
    # Submit a second through developer 2
    test_data3 = {}
    test_data3.update(test_data)
    test_data3["user_id"] = u"2"
    
    req = create_request(consumer2, "http://localhost/api/v1/submit/user", "POST", urlencode(test_data3))    

    response = testapp.post("/api/v1/submit/user", req.to_postdata())    
    assert response.status_int == 200
    assert response.json["user_id"] == u"2"
    assert response.json["is_new"]
    
    intake_user = models.IntakeUser.query(models.IntakeUser.user_id == cryptography.hash_value(test_data3["user_id"]),
                                          models.IntakeUser.developer == dev2.key).get()                                          

    check_intakeuser(intake_user, test_data3, dev2.key, dev2.org)