def __init__(self, num_buckets, q, tsv_path):
        self._q = q
        self._num_buckets = num_buckets

        # search for file stored to load data from
        self.__get_fnames__(tsv_path)

        self._ngmodel = pwm.NGramPw(n=3, pwfilename=leak_pw_fname)

        self._count_n = NTH_FREQ
        self._freq_q = q_probs[self._q]
        self._norm_factor = NORM_FACTOR
        self._bucket_factor = self._num_buckets / self._freq_q

        if os.path.exists(self._file_intervals) and os.path.exists(
                self._file_interval_sizes):
            self.__load_data__()
        else:
            self.pw_data = pd.read_csv(
                tsv_path,
                sep='\t',
                header=None,
                names=['id', 'pw', 'freq', 'ng-prob', 'hash'],
                engine='python',
                error_bad_lines=False,
                quoting=csv.QUOTE_NONE).set_index('id')
            self.__create_new_tree__()
Esempio n. 2
0
import pwmodel
import sys
pwm = pwmodel.NGramPw('/home/rahul/passwords/rockyou-withcount.txt.gz',
                      limit=1000000)
pws = pwm.generate_pws_in_order(int(sys.argv[1]),
                                filter_func=lambda x: 6 <= len(x) <= 30)
print('\n'.join(str(x) for x in pws))
print("Total generated: {}".format(len(pws)))
print("Total Prob: {}".format(sum(v for k, v in pws)))
print("p(123456)={}".format(pwm.prob('123456')))
Esempio n. 3
0
# top N passwords included in every bucket
N = 1
# total count of passwords in data
TOTAL_F = 0
# 1 - histogram proportion of top N passwords
REM_HIST = 0
# 1 - n-gram proportion of top N passwords
REM_NGRAM = 0
# frequency of top \bar{q}th password
NTH_FREQ = 0
NORM_FACTOR = REM_HIST / REM_NGRAM
# probabilities of top x passwords, for x=1, 10, 100, 1000
q_probs = {1:0, 10:0, 100:0, 1000:0}

passwords = Passwords(leak_pw_file)
ngmodel = pwm.NGramPw(n=3, pwfilename=leak_pw_file)


"""
Regular experiment with test cases from both previously compromised and uncompromised users. 
        hpb: True if doing hash prefix based bucketization, False if doing FSB
"""
def experiment(comp_sample_file, uncomp_sample_file, hpb=False):
    # read test samples from files
    comp_df, uncomp_df = sample_user_pws(comp_sample_file, uncomp_sample_file, num_trials, hpb)

    if comp_df != None:
        get_targeted_guesses(user_mul_pw_file, comp_df, predictions_files,hpb=hpb)

    if hpb:
        # add extra passwords from n-gram model