def test_linear_probe_bad_size(self):
     self.assertRaises(TypeError, lambda: ht.LinearProbe(None, hf.h_ascii))
     self.assertRaises(TypeError,
                       lambda: ht.LinearProbe('string', hf.h_ascii))
     self.assertRaises(TypeError, lambda: ht.LinearProbe(sum, hf.h_ascii))
     self.assertRaises(TypeError,
                       lambda: ht.LinearProbe(float(420.69), hf.h_ascii))
Ejemplo n.º 2
0
 def test_linear_probing(self):
     ht1 = hash_tables.LinearProbe(1000, hash_functions.h_ascii)
     ht2 = hash_tables.LinearProbe(1000, hash_functions.h_rolling)
     ht3 = hash_tables.LinearProbe(1000, hash_functions.h_myown)
     ht1.add('ABC', 30)
     ht2.add('ABC', 30)
     ht3.add('ABC', 30)
     self.assertEqual(ht1.search('ABC'), 30)
     self.assertEqual(ht1.search('DEF'), None)
     self.assertEqual(ht2.search('ABC'), 30)
     self.assertEqual(ht2.search('DEF'), None)
     self.assertEqual(ht3.search('ABC'), 30)
     self.assertEqual(ht3.search('DEF'), None)
Ejemplo n.º 3
0
    def test_linear_probe_full_random(self):
        size = random.randint(1, 10000)
        table = hash_tables.LinearProbe(hash_functions.h_ascii, size)
        for i in range(size):
            table.insert(str(i), i)

        self.assertRaises(IndexError, table.insert, 'full', 10)
        assert table.search('full') == -1
Ejemplo n.º 4
0
 def test_linear_probing(self):
     test_case = ht.LinearProbe(500, hf.h_sedgwicks)
     # Testing add
     self.assertEqual(test_case.add('key', 'value'), True)
     # Testing search
     self.assertEqual(test_case.search('key'), 'value')
     # Testing search not exist
     self.assertEqual(test_case.search('wrong_key'), None)
    def test_linear_probe_bad_function_name(self):
        size = 100
        hash_table = hash_tables.LinearProbe(size, "not function")

        key = ''.join(r.choices(s.ascii_uppercase + s.digits, k=100))
        value = ''.join(r.choices(s.ascii_uppercase + s.digits, k=100))
        self.assertFalse(hash_table.add(key, value))
        self.assertEqual(hash_table.search(key), None)
Ejemplo n.º 6
0
    def test_linear_probing(self):
        ascii_test = hash_tables.LinearProbe(1000, hash_functions.h_ascii)
        ascii_test.add('test_key', 'test_value')
        self.assertEqual(ascii_test.search('test_key'), 'test_value')
        self.assertNotEqual(ascii_test.search('test_key'), 'bad')
        self.assertEqual(ascii_test.search('bad_key'), None)

        rolling_test = hash_tables.LinearProbe(1000, hash_functions.h_rolling)
        rolling_test.add('test_key', 'test_value')
        self.assertEqual(rolling_test.search('test_key'), 'test_value')
        self.assertNotEqual(rolling_test.search('test_key'), 'bad')
        self.assertEqual(rolling_test.search('bad_key'), None)

        DJB_test = hash_tables.LinearProbe(1000, hash_functions.h_DJB)
        DJB_test.add('test_key', 'test_value')
        self.assertEqual(DJB_test.search('test_key'), 'test_value')
        self.assertNotEqual(DJB_test.search('test_key'), 'bad')
        self.assertEqual(DJB_test.search('bad_key'), None)
Ejemplo n.º 7
0
 def test_linear_probe_add_search_random(self):
     table = hash_tables.LinearProbe(hash_functions.h_ascii, 100)
     key = ''
     for _ in range(random.randint(1, 10)):
         val = random.randint(97, 122)
         key += chr(val)
     value = random.randint(0, 10000)
     assert(table.insert(key, value) is True)
     assert(table.search(key) == value)
Ejemplo n.º 8
0
    def test_linearprobe_h_ascii_single_element(self):
        table = ht.LinearProbe(1, ht.h_ascii)
        randstr = ""
        strlen = random.randint(1, 50)
        randval = random.randint(0, 999)

        for char in range(0, strlen):
            randstr += chr(random.randint(32, 126))

        table.add(randstr, randval)

        self.assertEqual(randval, table.search(randstr))
    def test_linear_probe_rehashing(self):
        size = 1000
        hash_table = hash_tables.LinearProbe(size, hf.h_ascii)

        entries = {}
        for i in range(int(size * 2)):
            key = ''.join(r.choices(s.ascii_uppercase + s.digits, k=100))
            value = ''.join(r.choices(s.ascii_uppercase + s.digits, k=100))
            entries[key] = value
            self.assertTrue(hash_table.add(key, value))

        for k, v in entries.items():
            self.assertEqual(hash_table.search(k), v)
    def test_linear_probe_nonexistent_key(self):
        size = 100
        hash_table = hash_tables.LinearProbe(size, hf.h_ascii)

        entries = {}
        for i in range(int(size / 2)):
            key = ''.join(r.choices(s.ascii_uppercase + s.digits, k=100))
            value = ''.join(r.choices(s.ascii_uppercase + s.digits, k=100))
            entries[key] = value
            self.assertTrue(hash_table.add(key, value))

        self.assertEqual(
            hash_table.search(
                "This is a key that is very unlikely to be generated"), None)
 def test_linear_probe_ascii_variable_add_search(self):
     for i in range(100):
         test_length = rdm.randint(1, 100)
         letters = string.ascii_lowercase + string.ascii_uppercase
         test_value = rdm.randint
         test_key = ''
         for j in range(rdm.randint(1, 100)):
             letter = rdm.choice(letters)
             test_key += letter
         test_table = ht.LinearProbe(test_length, hf.h_ascii)
         test_table.add(test_key, test_value)
         self.assertEqual((test_key, test_value),
                          test_table.T[hf.h_ascii(test_key, test_length)])
         self.assertEqual(test_value, test_table.search(test_key))
 def test_linear_probe_rolling_collision(self):
     for i in range(100):
         test_length = rdm.randint(2, 1000)
         test_value1 = rdm.randint(1, 1000)
         test_value2 = rdm.randint(1, 1000)
         test_key = 'teststring'
         test_table = ht.LinearProbe(test_length, hf.h_rolling)
         test_table.add(test_key, test_value1)
         test_table.add(test_key, test_value2)
         self.assertEqual(test_value1, test_table.search(test_key))
         if test_table.N - 1 == hf.h_rolling(test_key, test_length):
             self.assertEqual((test_key, test_value2), test_table.T[0])
             continue
         self.assertEqual(
             (test_key, test_value2),
             test_table.T[hf.h_rolling(test_key, test_length) + 1])
def time_unsorted(arg, unsorted_data):
    if arg == 'hash':
        table = hash_tables.LinearProbe(100000, hash_functions.h_rolling)
        t0 = time.time()
        for i in range(len(unsorted_data)):
            table.add(unsorted_data[i][0], unsorted_data[i][1])
        t1 = time.time()
        elapsed_unsorted_insert = t1 - t0
    if arg == 'tree':
        root = None
        t0 = time.time()
        for i in range(len(unsorted_data)):
            bt.insert(root, int(unsorted_data[i][0]), unsorted_data[i][1])
        t1 = time.time()
        elapsed_unsorted_insert = t1 - t0
    return elapsed_unsorted_insert
Ejemplo n.º 14
0
    def test_linearprobe_h_rolling_multiple_elements(self):
        tablesize = 1000
        table = ht.LinearProbe(tablesize, ht.h_rolling)
        tabledict = {}

        for i in range(0, 500):
            randkey = ""
            randomval = random.randint(0, 100)
            for i in range(0, random.randint(1, 50)):
                randkey += chr(random.randint(32, 126))
            if randkey in tabledict:
                continue
            else:
                if table.add(randkey, randomval) == -1:
                    break
                else:
                    tabledict[randkey] = randomval
                    table.add(randkey, randomval)

        for key in tabledict:
            self.assertEqual(tabledict[key], table.search(key))
Ejemplo n.º 15
0
 def test_search_bad_value(self):
     test = ht.LinearProbe(50, hf.h_ascii)
     test.add('text', 'value')
     self.assertEqual(test.search('nothere'), None)
Ejemplo n.º 16
0
 def test_linear_probe_replace_key(self):
     table = hash_tables.LinearProbe(hash_functions.h_ascii, 30)
     table.insert('ayo', 10)
     table.insert('ayo', 100)
     assert table.capacity == 1
     assert table.search('ayo') == 100
Ejemplo n.º 17
0
 def test_linear_probe_key_not_in_table(self):
     table = hash_tables.LinearProbe(hash_functions.h_ascii, 30)
     assert table.search('not in table') == -1
Ejemplo n.º 18
0
 def testLinearProbe_add_to_full_ascii(self):
     x = random.randint(0, 100)
     y = hash_functions.h_ascii
     test = hash_tables.LinearProbe(x, y)
     test.T = [str(random.randint(0, 100)) for i in range(test.N)]
     self.assertFalse(test.add('key', 10))
Ejemplo n.º 19
0
 def testLinearProbe_search_not_in_table_ascii(self):
     test = hash_tables.LinearProbe(10, hash_functions.h_ascii)
     test.T = [str(random.randint(0, 100)) for i in range(test.N)]
     self.assertFalse(test.search('key'))
Ejemplo n.º 20
0
 def test_linear_probe_search_1(self):
     table = hash_tables.LinearProbe(hash_functions.h_ascii, 100)
     table.insert('woah!', 1)
     assert(table.search('woah!') == 1)
Ejemplo n.º 21
0
 def test_linear_probe_add_empty(self):
     table = hash_tables.LinearProbe(hash_functions.h_ascii, 100)
     assert(table.insert('woah!', 1) is True)
Ejemplo n.º 22
0
 def testLinearProbe_search_in_table_python(self):
     test = hash_tables.LinearProbe(10, hash_functions.h_python)
     test.T = [(str(i), 2 * i) for i in range(test.N)]
     self.assertEqual(test.search('3'), 6)
 def test_linear_probe_bad_fxn(self):
     self.assertRaises(TypeError, lambda: ht.LinearProbe(5, None))
     self.assertRaises(TypeError, lambda: ht.LinearProbe(5, 'string'))
     self.assertRaises(TypeError, lambda: ht.LinearProbe(5, int(5)))
     self.assertRaises(TypeError, lambda: ht.LinearProbe(5, float(420.69)))
Ejemplo n.º 24
0
 def test_no_overwrite(self):
     test = ht.LinearProbe(50, hf.h_ascii)
     test.add('text', 'value')
     test.add('text', 'newvalue')
     self.assertEqual(test.T[3][1], 'value')
Ejemplo n.º 25
0
 def test_search_function(self):
     test = ht.LinearProbe(50, hf.h_ascii)
     test.add('text', 'value')
     self.assertEqual(test.search('text'), 'value')
Ejemplo n.º 26
0
 def testLinearProbe_add_to_empty_ascii(self):
     x = random.randint(0, 100)
     y = hash_functions.h_ascii
     test = hash_tables.LinearProbe(x, y)
     self.assertTrue(test.add('key', 10))
 def test_linear_probe_search_key_none(self):
     test_table = ht.LinearProbe(5, hf.h_ascii)
     self.assertEqual(None, test_table.search(None))
 def test_linear_probe_add_key_none(self):
     test_table = ht.LinearProbe(5, hf.h_ascii)
     self.assertEqual(None, test_table.add(None, 420))
Ejemplo n.º 29
0
 def test_add_function(self):
     test = ht.LinearProbe(50, hf.h_ascii)
     test.add('text', 'value')
     self.assertEqual(test.T[3][1], 'value')
def main():
    # Argparse Defns
    parser = argparse.ArgumentParser(description='Plot gene expression for'
                                     ' tissue type and '
                                     'tissue group given a gene')

    parser.add_argument('--gene_reads',
                        type=str,
                        help='File containing gene reads',
                        required=True)

    parser.add_argument('--sample_attributes',
                        type=str,
                        help='File containing the sample attributes',
                        required=True)

    parser.add_argument('--gene',
                        type=str,
                        help='Name of the gene you wish to analyze',
                        required=True)

    parser.add_argument(
        '--group_type',
        type=str,
        help='Name of the group of samples you wish to analyze expression for',
        required=True)

    parser.add_argument('--output_file',
                        type=str,
                        help='Name of the file the boxplot will be saved to',
                        required=True)

    args = parser.parse_args()

    # Defines file names
    data_file_name = args.gene_reads
    sample_info_file_name = args.sample_attributes

    # Defines variable names
    sample_id_col_name = 'SAMPID'
    tissue_group_col_name = args.group_type
    gene_name = args.gene

    # samples is a list that stores each
    # sample and it's attributes as a list within the larger list
    # info_header is a parallel array to each list element within samples
    samples = []
    info_header = None
    try:
        num_samp = 0
        for l in open(sample_info_file_name):
            if info_header is None:
                info_header = l.rstrip().split('\t')
            else:
                samples.append(l.rstrip().split('\t'))
                num_samp += 1
    except ValueError:
        print('Could not read sample info file')
    N_samp = int(100000)
    N_groups = 1000

    # Initalizes hash tables
    group_table = ht.ChainedHash(N_groups, hf.h_rolling)
    read_table = ht.LinearProbe(N_samp, hf.h_rolling)

    # stores the index of attributes for samples/info_header arrays
    tissue_group_col_idx = linear_search(tissue_group_col_name, info_header)
    sample_id_col_idx = linear_search(sample_id_col_name, info_header)

    # writes the first hash table
    try:
        for row_idx in range(len(samples)):
            sample = samples[row_idx]
            sample_name = sample[sample_id_col_idx]
            curr_group = sample[tissue_group_col_idx]
            group_table.add(curr_group, sample_name)
    except ValueError:
        print('Could not assign Sample IDs')

    version = None
    dim = None
    data_header = None
    gene_name_col = 1

    try:
        for l in gzip.open(data_file_name, 'rt'):
            if version is None:
                version = l
                continue

            if dim is None:
                dim = [int(x) for x in l.rstrip().split()]
                continue

            # Sorts the data header so binary_search can be utilized
            if data_header is None:
                data_header = []
                i = 0
                for field in l.rstrip().split('\t'):
                    data_header.append([field, i])
                    i += 1
                data_header.sort(key=lambda tup: tup[0])

            A = l.rstrip().split('\t')
            if A[gene_name_col] == gene_name:
                for i in range(2, len(data_header) - 2):
                    read_table.add(str(data_header[i][0]), A[i])
    except ValueError:
        print('Could not read data info file')

    # group_counts stores the associated
    # gene counts for each sample within lists
    # at the same index position as their groups
    groups = list(set(group_table.keys))
    group_counts = [[] for i in range(len(groups))]
    for group in range(len(groups)):
        for i in range(len(group_table.T)):
            if group_table.T[i] != []:
                if group_table.T[i][0][0] == groups[group]:
                    for sample in range(len(group_table.T[i])):
                        read = read_table.search(
                            str(group_table.T[i][sample][1]))
                        if read is not None:
                            group_counts[group].append(int(read))

    # This portion utilized parallel arrays
    # # samples is a list that stores each
    # # sample and it's attributes as a list within the larger list
    # # info_header is a parallel array to each list element within samples
    # samples = []
    # info_header = None
    #
    # try:
    #     for l in open(sample_info_file_name):
    #         if info_header is None:
    #             info_header = l.rstrip().split('\t')
    #         else:
    #             samples.append(l.rstrip().split('\t'))
    # except ValueError:
    #     print('Could not read sample info file')
    #
    # # stores the index of attributes for samples/info_header arrays
    # tissue_group_col_idx = linear_search(tissue_group_col_name, info_header)
    # sample_id_col_idx = linear_search(sample_id_col_name, info_header)
    #
    # # group is an array that stores each tissue group
    # # groupmembers stores lists of sample IDs of
    # # groups in the same index location as the group array
    # groups = []
    # groupmembers = []
    #
    # try:
    #     for row_idx in range(len(samples)):
    #         sample = samples[row_idx]
    #         sample_name = sample[sample_id_col_idx]
    #         curr_group = sample[tissue_group_col_idx]
    #         curr_group_idx = linear_search(curr_group, groups)
    #
    #         if curr_group_idx == -1:
    #             curr_group_idx = len(groups)
    #             groups.append(curr_group)
    #             groupmembers.append([])
    #
    #         groupmembers[curr_group_idx].append(sample_name)
    # except ValueError:
    #     print('Could not assign Sample IDs')
    #
    # # group_counts stores the associated
    # # gene counts for each sample within lists
    # # at the same index position as their groups
    # group_counts = [[] for i in range(len(groups))]
    #
    # version = None
    # dim = None
    # data_header = None
    #
    # gene_name_col = 1
    #
    # try:
    #     for l in gzip.open(data_file_name, 'rt'):
    #         if version is None:
    #             version = l
    #             continue
    #
    #         if dim is None:
    #             dim = [int(x) for x in l.rstrip().split()]
    #             continue
    #
    #         # Sorts the data header so binary_search can be utilized
    #         if data_header is None:
    #             data_header = []
    #             i = 0
    #             for field in l.rstrip().split('\t'):
    #                 data_header.append([field, i])
    #                 i += 1
    #             data_header.sort(key=lambda tup: tup[0])
    #
    #         A = l.rstrip().split('\t')
    #
    #         if A[gene_name_col] == gene_name:
    #             for group_idx in range(len(groups)):
    #                 for member in groupmembers[group_idx]:
    #                     member_idx = binary_search(member, data_header)
    #                     if member_idx != -1:
    #                         group_counts[group_idx].append(int(A[member_idx]))
    #
    #             break
    # except ValueError:
    #     print('Could not read data info file')

    data_viz.boxplot(group_counts, groups,
                     str(args.gene) + ' Expression of Tissue Group',
                     'Tissue Group = ' + str(args.group_type),
                     str(args.gene) + ' Counts', args.output_file)