예제 #1
0
def rt_query_process(query,
                     clustering,
                     gcpa_data,
                     machines,
                     dataunit_in_machine,
                     ctype='fast'):
    query = set(query)
    start = time.time()
    if ctype == 'fast':
        cluster_index = clustering.insert_rt_fast(query)
    elif ctype == 'full':
        cluster_index = clustering.insert_rt_noupdate(query)

    if cluster_index == -1:
        #        print 'JUST DID LG'
        return linear_greedy(query, machines,
                             dataunit_in_machine)[0], time.time() - start

#    relevant_parts = parts_data[cluster_index]
    parts_cover = gcpa_data.partcover_by_cluster[cluster_index]
    dataunit_in_parts = gcpa_data.partindex_by_cluster[cluster_index]

    unprocessed = set(query)
    last_greedy = set()
    cover = set()
    while len(unprocessed) > 0:
        x = unprocessed.pop()
        # Depending on how you write rt-gcpa, one of these if statements can be removed
        if x in dataunit_in_parts:
            x_part = dataunit_in_parts[x]
            #            print x_part
            #            print hello
            cover |= parts_cover[x_part]
            for machine in parts_cover[x_part]:
                unprocessed = unprocessed - machines[machine]
                last_greedy = last_greedy - machines[machine]
        else:
            last_greedy.add(x)


#    if len(last_greedy) == len(query):
#        print 'BAD!'
    cover |= linear_greedy(last_greedy, machines, dataunit_in_machine)[0]
    dt = time.time() - start

    #    # COVERED CHECK
    query_copy = set(query)
    for c in cover:
        query_copy = query_copy - machines[c]
    if len(query_copy) > 0:
        print 'NOT COVERED'

    return cover, dt
예제 #2
0
    def process(self, machines, dataunit_in_machine): 
        nprocessed=0
        queues = self.queues
        datapart_in_query_lists = self.datapart_in_query_lists
        uppersets_list = self.uppersets_list
        nqueriesinclusters = self.nqueries_in_clusters
#        covers = self.covers
#        covers=[None for i in xrange(len(queues))]
        for index in range(len(queues)):
            self.covers[index]=[set() for i in xrange(nqueriesinclusters[index])]

        start=time.time()

        for queueindex,queue in enumerate(queues):
        #    queue=queues[queueindex]

            dataunit_in_part = dict()
            part_cover = dict()

            covered=set()
            part_index = -1
            for datapartindex in xrange(len(queue)):

                part_index += 1
                datapart=queue[datapartindex]-covered

                if datapart!=set():

                    #now we make machinesintersected
                    machinesintersected={}

                    for dataunit in datapart:
                        dataunit_in_part[dataunit] = part_index
                        for machine in dataunit_in_machine[dataunit]:
                            if machine in machinesintersected:
                                machinesintersected[machine].add(dataunit)
                            else:
                                machinesintersected[machine]=set([dataunit])
                    
                    #then use the linear greedy
                    cover, dt = linear_greedy(datapart, machines, dataunit_in_machine)
                    part_cover[part_index] = cover
                    #now we add the cover to appropriate covers for queries
                    for queryindex in datapart_in_query_lists[queueindex][datapartindex]:
        #                smartload+=len(cover-covers[queueindex][queryindex])
                        self.covers[queueindex][queryindex]|=cover

                    #and the last thing, we need to remove appropriate elements from appropriate data parts
                    coveredinupperset=datapart.copy()
                    for dataunit in uppersets_list[queueindex][datapartindex]-datapart:
                        if not (dataunit_in_machine[dataunit]&cover)==set():
                            coveredinupperset.add(dataunit)

                    covered|=coveredinupperset
            self.partindex_by_cluster.append(dataunit_in_part)
            self.partcover_by_cluster.append(part_cover)
        #    nprocessed+=1
        #    print nprocessed

        end=time.time()
def rt_query_process(query, clustering, gcpa_data, machines, dataunit_in_machine, ctype='fast'): 
    query = set(query)
    start = time.time()
    if ctype == 'fast': 
        cluster_index = clustering.insert_rt_fast(query)
    elif ctype=='full':
        cluster_index = clustering.insert_rt_noupdate(query)
    
    if cluster_index == -1: 
#        print 'JUST DID LG'
        return linear_greedy(query, machines, dataunit_in_machine)[0], time.time() - start

#    relevant_parts = parts_data[cluster_index]
    parts_cover = gcpa_data.partcover_by_cluster[cluster_index]
    dataunit_in_parts = gcpa_data.partindex_by_cluster[cluster_index]

    unprocessed = set(query)
    last_greedy = set()
    cover = set()
    while len(unprocessed) >0: 
        x = unprocessed.pop()
        # Depending on how you write rt-gcpa, one of these if statements can be removed
        if x in dataunit_in_parts: 
            x_part = dataunit_in_parts[x]
#            print x_part
#            print hello
            cover |= parts_cover[x_part]
            for machine in parts_cover[x_part]: 
                unprocessed = unprocessed - machines[machine]
                last_greedy = last_greedy - machines[machine]
        else: 
            last_greedy.add(x)
#    if len(last_greedy) == len(query): 
#        print 'BAD!'
    cover |= linear_greedy(last_greedy, machines, dataunit_in_machine)[0]
    dt = time.time() - start

#    # COVERED CHECK
    query_copy = set(query)
    for c in cover: 
        query_copy = query_copy - machines[c]
    if len(query_copy) > 0: 
        print 'NOT COVERED'

    return cover, dt
def full_clustering_procedure_comparisons(ndata=100000,
                                          N=50000,
                                          nmachines=50,
                                          min_q_len=6,
                                          max_q_len=15,
                                          number_of_clusterings=1,
                                          queryfile=None,
                                          np=.995,
                                          delim=','):
    NoNodes = ndata

    for iteration in xrange(number_of_clusterings):
        print 'ITERATION: ', iteration

        #        np = .993
        p = np / NoNodes

        output = []
        if queryfile == None:
            #we genarate random graph on NoNodes vertexes (need to set probability)
            g = Graph.Erdos_Renyi(n=NoNodes, p=p)
            print 'Graph generated'

            #taking random node from the graph
            node = random.randint(0, NoNodes - 1)

            #the DFS function, as arguments we have name of the graph, first node
            output = []

            #the loop on the number of queries
            #    for q in range(N):
            while len(output) < N:
                node = random.randint(0, NoNodes - 1)
                line = iterative_dfs(g, node, path=[])
                if len(line) >= min_q_len:
                    output.append(line)

            graphfile = 'n' + str(
                len(output) / 1000) + 'np' + str(np) + '_' + str(iteration)
            with open(graphfile + '.csv', 'wb') as f:
                w = csv.writer(f)
                for line in output:
                    w.writerow(line)

            print 'Queries generated', len(output)
        else:
            with open(queryfile + '.csv', 'rb') as f:
                r = csv.reader(f, delimiter=delim)
                for row in r:
                    output.append(map(int, row))
            print 'Queries imported'
            graphfile = queryfile

        infile = graphfile

        test_queries = output

        max_len = len(test_queries)
        N = len(test_queries)
        #min(50000, len(test_queries))
        test_queries = test_queries[:max_len]

        #    clusters, disjoint, cluster_tracker, data_added_count, data_in_nclusters = simple_entropy(test_queries)
        #    clusters, cl_entropies = simple_entropy(test_queries)
        clustering = Clustering(test_queries, notif='loud')
        clusters = clustering.clusters

        outfile = infile + '_output_test'

        print 'Clustered'

        with open(outfile + '.csv', 'wb') as f:
            #    f.write('Output from simpleROCK clustering algorithm \n')
            f.write(str(len(clusters)) + '\n')
            ctr = 1
            for c in clusters:
                f.write('-----------------------\n')
                f.write('Cluster ' + str(ctr) + '\n')
                f.write('# of Queries: ' + str(len(c)) + '\n')
                #print 1.0*c.min_query_len/len(c.span)
                #        f.write('Span: ' + str(c.span) + '\n')
                f.write(c.aligned_output())
                f.write('-----------------------\n')
                ctr += 1
        print 'Clusters written to file'

        machines = generate(range(ndata), nmachines)
        dataunit_in_machine = generate_hash(machines, ndata)
        gcpa_data = GCPA(clustering, ndata)
        start = time.time()
        gcpa_data.process(machines, dataunit_in_machine)
        cover_time = time.time() - start
        average = 1.0 * cover_time / len(test_queries)

        gcpa_better = GCPA_better(clustering, ndata)

        betterstart = time.time()
        gcpa_better.process(machines, dataunit_in_machine)
        better_dt = time.time() - betterstart
        better_average = 1.0 * better_dt / len(test_queries)

        lg_start = time.time()
        for query in test_queries:
            cover, dt = linear_greedy(query, machines, dataunit_in_machine)

        lg_dt = time.time() - lg_start
        lg_ave = 1.0 * lg_dt / len(test_queries)

        baseline_start = time.time()
        for query in test_queries:
            cover, dt = baseline(query, machines, dataunit_in_machine)
        baseline_dt = time.time() - baseline_start
        baseline_ave = 1.0 * baseline_dt / len(test_queries)

        b_baseline_start = time.time()
        for query in test_queries:
            cover, dt = better_baseline(query, machines, dataunit_in_machine)
        b_baseline_dt = time.time() - baseline_start
        b_baseline_ave = 1.0 * b_baseline_dt / len(test_queries)

        #        print average, better_average, lg_ave, baseline_ave, b_baseline_ave
        print baseline_ave, b_baseline_ave, lg_ave, average, better_average

        covers = gcpa_data.covers
        better_covers = gcpa_better.covers

        to_write = []
        total = 0
        for clusterind, coverset in enumerate(covers):
            for query_ind, cover in enumerate(coverset):
                if total % 1000 == 0:
                    print total
                total += 1
                query = clustering.clusters[clusterind][query_ind]

                gcpa_fast_lin = cover
                gcpa_fast_better = better_covers[clusterind][query_ind]

                lg_cover, lg_dt = linear_greedy(query, machines,
                                                dataunit_in_machine)
                baseline_cover, baseline_dt = baseline(query, machines,
                                                       dataunit_in_machine)
                b_baseline_cover, b_baseline_dt = better_baseline(
                    query, machines, dataunit_in_machine)
                #            to_write.append(map(len, [gcpa_fast_lin, gcpa_fast_better, lg_cover, baseline_cover, b_baseline_cover]))
                to_write.append(
                    map(len, [
                        baseline_cover, b_baseline_cover, lg_cover,
                        gcpa_fast_lin, gcpa_fast_better
                    ]))

        with open(infile + 'big_comparison.csv', 'wb') as f:
            w = csv.writer(f)
            w.writerow([
                'Baseline', 'Better Baseline', 'N-Greedy', 'GCPA_G', 'GCPA_DL'
            ])
            w.writerow([
                baseline_ave, b_baseline_ave, lg_ave, average, better_average
            ])
            for row in to_write:
                w.writerow(row)
def full_realtime_comparisons(precompute_fraction=.2, nqueries=50000, ndataunits=100000, nmachines=50, r=3, np=.995,
                  min_q_len=6, max_q_len=15, ctype='fast', gcpatype='better', queryfile=None,delim=','): 
    queries = []

    if queryfile == None: 
        g = Graph.Erdos_Renyi(n=ndataunits, p = np/ndataunits)
        q = 0
        while q < nqueries: 
            node=random.randint(0, ndataunits-1)
            line = iterative_dfs(g, node, path=[])
            if len(line) >= min_q_len:
                queries.append(line)
                q += 1

        graphfile = 'n' + str(len(queries)/1000) + 'np' + str(np) +ctype + gcpatype + 'test'
        with open(graphfile + '.csv','wb') as f:
            w = csv.writer(f)
            for line in queries:
                w.writerow(line)

        print 'Queries generated', len(queries)
    else: 
        with open(queryfile + '.csv', 'rb') as f: 
            r = csv.reader(f, delimiter=delim)
            for row in r: 
                queries.append(map(int, row))
        graphfile = queryfile

    infile = graphfile
#    max_to_process = min(nqueries, len(queries))
#    queries = queries[:max_to_process]

    pre_computed = queries[:int(precompute_fraction*len(queries))]
    machines = generate(range(ndataunits), nmachines)
    dataunit_in_machine = generate_hash(machines, ndataunits)

    clustering = Clustering(pre_computed, notif='loud')

    rt_queries = queries[len(pre_computed):]
    
    if gcpatype == 'linear': 
        gcpa_data = GCPA(clustering, ndataunits)
    elif gcpatype == 'better': 
        gcpa_data = GCPA_better(clustering, ndataunits)
    elif gcpatype == 'both': 
        gcpa_linear = GCPA(clustering, ndataunits)
        gcpa_better = GCPA_better(clustering, ndataunits)
    
    if gcpatype != 'both':
        gcpa_data.process(machines, dataunit_in_machine)
    else: 
        gcpa_linear.process(machines, dataunit_in_machine)
        gcpa_better.process(machines, dataunit_in_machine)

    gcpa_rt_coverlens = []
    gcpa_times = []

    
    lg_coverlens = []
    baseline_coverlens = []
    baseline_times = []

    b_baseline_coverlens = []
    b_baseline_times = []
    smaller = 0

    lg_times = []
    for idx, query in enumerate(rt_queries):
        oldlen = len(query)
        if (idx % 1000) == 0: 
            print 'Query: ', idx

        if ctype != 'both': 
            cover, gcpa_dt = rt_query_process(query, clustering, gcpa_data, machines, dataunit_in_machine, ctype)
            gcpa_rt_coverlens.append(len(cover))
            gcpa_times.append(gcpa_dt)
        else: 
            cover_fast, gcpa_fast_dt = rt_query_process(query, clustering, gcpa_linear, machines, dataunit_in_machine, 'fast')
            cover_full, gcpa_full_dt = rt_query_process(query, clustering, gcpa_linear, machines, dataunit_in_machine, 'full')
            cover_better_fast, gcpa_better_fast_dt = rt_query_process(query, clustering, gcpa_better, machines, dataunit_in_machine, 'fast')
            cover_better_full, gcpa_better_full_dt = rt_query_process(query, clustering, gcpa_better, machines, dataunit_in_machine, 'full')
            gcpa_rt_coverlens.append(map(len,[cover_fast, cover_full, cover_better_fast, cover_better_full]))
            gcpa_times.append([gcpa_fast_dt, gcpa_full_dt, gcpa_better_fast_dt, gcpa_better_full_dt])
        
        lg_cover, lg_dt = linear_greedy(query, machines, dataunit_in_machine)
        
        lg_times.append(lg_dt)

        baseline_cover, baseline_time = baseline(query, machines, dataunit_in_machine)

        lg_coverlens.append(len(lg_cover))
        baseline_coverlens.append(len(baseline_cover))
        baseline_times.append(baseline_time)
        
        b_baseline_cover, b_baseline_time = better_baseline(query, machines, dataunit_in_machine)
        b_baseline_coverlens.append(len(b_baseline_cover))
        b_baseline_times.append(b_baseline_time)

    with open(infile +'_cover_len_comparison.csv', 'wb') as f:
        w = csv.writer(f)
        if ctype != 'both': 
            w.writerow(['GCPA', 'Greedy', 'Baseline', 'Better Baseline'])
            for idx, cl in enumerate(gcpa_rt_coverlens):
                w.writerow([cl, lg_coverlens[idx], baseline_coverlens[idx], b_baseline_coverlens[idx]])
        else: 
            w.writerow(['GCPA_G_A', 'GCPA_G_U', 'GCPA_DL_A', 'GCPA_DL_U', 'Greedy', 'Baseline', 'Better Baseline'])
            for idx, cl in enumerate(gcpa_rt_coverlens):
                cl.extend([lg_coverlens[idx], baseline_coverlens[idx], b_baseline_coverlens[idx]])
                w.writerow(cl)

    with open(infile +'_time_comparison.csv', 'wb') as f:
        w = csv.writer(f)
        if ctype != 'both':
            w.writerow(['GCPA', 'Greedy', 'Baseline', 'Better Baseline']) 
            for idx, gcpa_dt in enumerate(gcpa_times): 
                w.writerow([gcpa_dt, lg_times[idx], baseline_times[idx], b_baseline_times[idx]])
        else: 
            w.writerow(['GCPA_G_A', 'GCPA_G_U', 'GCPA_DL_A', 'GCPA_DL_U', 'Greedy', 'Baseline', 'Better Baseline'])
            for idx, gcpa_dt in enumerate(gcpa_times): 
                gcpa_dt.extend([lg_times[idx], baseline_times[idx], b_baseline_times[idx]])
                w.writerow(gcpa_dt)
def full_clustering_procedure_comparisons(ndata = 100000, N=50000, nmachines = 50, min_q_len = 6, max_q_len = 15, number_of_clusterings=1, queryfile = None, np = .995, delim=','): 
    NoNodes = ndata

    for iteration in xrange(number_of_clusterings): 
        print 'ITERATION: ', iteration

#        np = .993
        p = np/NoNodes

        output = []
        if queryfile == None: 
    #we genarate random graph on NoNodes vertexes (need to set probability)
            g=Graph.Erdos_Renyi(n=NoNodes, p=p)
            print 'Graph generated'

    #taking random node from the graph
            node=random.randint(0,NoNodes-1)

    #the DFS function, as arguments we have name of the graph, first node
            output = []

    #the loop on the number of queries
    #    for q in range(N):
            while len(output) < N: 
                node=random.randint(0, NoNodes-1)
                line = iterative_dfs(g, node, path=[])
                if len(line) >= min_q_len:
                    output.append(line)

            graphfile = 'n' + str(len(output)/1000) + 'np' + str(np) + '_' + str(iteration)
            with open(graphfile + '.csv','wb') as f:
                w = csv.writer(f)
                for line in output:
                    w.writerow(line)

            print 'Queries generated', len(output)
        else: 
            with open(queryfile + '.csv', 'rb') as f: 
                r = csv.reader(f,delimiter=delim)
                for row in r:
                    output.append(map(int, row))
            print 'Queries imported'
            graphfile =  queryfile

        infile = graphfile

        test_queries = output

        max_len = len(test_queries)
        N = len(test_queries)
        #min(50000, len(test_queries))
        test_queries = test_queries[:max_len]

    #    clusters, disjoint, cluster_tracker, data_added_count, data_in_nclusters = simple_entropy(test_queries)
    #    clusters, cl_entropies = simple_entropy(test_queries)
        clustering = Clustering(test_queries, notif='loud')
        clusters = clustering.clusters

        outfile = infile + '_output_test'

        print 'Clustered'

        with open(outfile + '.csv', 'wb') as f:
    #    f.write('Output from simpleROCK clustering algorithm \n')
            f.write(str(len(clusters)) + '\n')
            ctr = 1
            for c in clusters: 
                f.write('-----------------------\n')
                f.write('Cluster ' + str(ctr) + '\n')
                f.write('# of Queries: ' + str(len(c)) + '\n')
            #print 1.0*c.min_query_len/len(c.span)
            #        f.write('Span: ' + str(c.span) + '\n')
                f.write(c.aligned_output())
                f.write('-----------------------\n')
                ctr += 1
        print 'Clusters written to file'

        machines = generate(range(ndata), nmachines)
        dataunit_in_machine = generate_hash(machines, ndata)
        gcpa_data = GCPA(clustering,ndata)
        start = time.time()
        gcpa_data.process(machines, dataunit_in_machine)
        cover_time = time.time() - start
        average = 1.0*cover_time/len(test_queries)

        gcpa_better = GCPA_better(clustering, ndata)

        betterstart = time.time()
        gcpa_better.process(machines, dataunit_in_machine)
        better_dt = time.time() - betterstart
        better_average = 1.0*better_dt/len(test_queries)

        lg_start = time.time()
        for query in test_queries: 
            cover, dt = linear_greedy(query, machines, dataunit_in_machine)

        lg_dt = time.time() - lg_start
        lg_ave = 1.0*lg_dt/len(test_queries)

        baseline_start = time.time()
        for query in test_queries: 
            cover, dt = baseline(query, machines, dataunit_in_machine)
        baseline_dt = time.time() - baseline_start
        baseline_ave = 1.0*baseline_dt/len(test_queries)

        b_baseline_start = time.time()
        for query in test_queries: 
            cover, dt = better_baseline(query, machines, dataunit_in_machine)
        b_baseline_dt = time.time() - baseline_start
        b_baseline_ave = 1.0*b_baseline_dt/len(test_queries)

#        print average, better_average, lg_ave, baseline_ave, b_baseline_ave
        print baseline_ave, b_baseline_ave, lg_ave, average, better_average

        covers = gcpa_data.covers
        better_covers = gcpa_better.covers

        to_write = []
        total = 0
        for clusterind, coverset in enumerate(covers): 
            for query_ind, cover in enumerate(coverset): 
                if total % 1000 == 0: 
                    print total 
                total +=1 
                query = clustering.clusters[clusterind][query_ind]

                gcpa_fast_lin = cover
                gcpa_fast_better = better_covers[clusterind][query_ind]

                lg_cover, lg_dt = linear_greedy(query, machines, dataunit_in_machine)
                baseline_cover, baseline_dt = baseline(query, machines, dataunit_in_machine)
                b_baseline_cover, b_baseline_dt = better_baseline(query, machines, dataunit_in_machine)
    #            to_write.append(map(len, [gcpa_fast_lin, gcpa_fast_better, lg_cover, baseline_cover, b_baseline_cover]))
                to_write.append(map(len, [baseline_cover, b_baseline_cover, lg_cover, gcpa_fast_lin, gcpa_fast_better]))

        with open(infile + 'big_comparison.csv', 'wb') as f: 
            w = csv.writer(f)
            w.writerow(['Baseline', 'Better Baseline', 'N-Greedy', 'GCPA_G', 'GCPA_DL'])
            w.writerow([baseline_ave, b_baseline_ave, lg_ave, average, better_average])
            for row in to_write: 
                w.writerow(row)
예제 #7
0
def full_realtime_comparisons(precompute_fraction=.2,
                              nqueries=50000,
                              ndataunits=100000,
                              nmachines=50,
                              r=3,
                              np=.995,
                              min_q_len=6,
                              max_q_len=15,
                              ctype='fast',
                              gcpatype='better',
                              queryfile=None,
                              delim=','):
    queries = []

    if queryfile == None:
        g = Graph.Erdos_Renyi(n=ndataunits, p=np / ndataunits)
        q = 0
        while q < nqueries:
            node = random.randint(0, ndataunits - 1)
            line = iterative_dfs(g, node, path=[])
            if len(line) >= min_q_len:
                queries.append(line)
                q += 1

        graphfile = 'n' + str(
            len(queries) / 1000) + 'np' + str(np) + ctype + gcpatype + 'test'
        with open(graphfile + '.csv', 'wb') as f:
            w = csv.writer(f)
            for line in queries:
                w.writerow(line)

        print 'Queries generated', len(queries)
    else:
        with open(queryfile + '.csv', 'rb') as f:
            r = csv.reader(f, delimiter=delim)
            for row in r:
                queries.append(map(int, row))
        graphfile = queryfile

    infile = graphfile
    #    max_to_process = min(nqueries, len(queries))
    #    queries = queries[:max_to_process]

    pre_computed = queries[:int(precompute_fraction * len(queries))]
    machines = generate(range(ndataunits), nmachines)
    dataunit_in_machine = generate_hash(machines, ndataunits)

    clustering = Clustering(pre_computed, notif='loud')

    rt_queries = queries[len(pre_computed):]

    if gcpatype == 'linear':
        gcpa_data = GCPA(clustering, ndataunits)
    elif gcpatype == 'better':
        gcpa_data = GCPA_better(clustering, ndataunits)
    elif gcpatype == 'both':
        gcpa_linear = GCPA(clustering, ndataunits)
        gcpa_better = GCPA_better(clustering, ndataunits)

    if gcpatype != 'both':
        gcpa_data.process(machines, dataunit_in_machine)
    else:
        gcpa_linear.process(machines, dataunit_in_machine)
        gcpa_better.process(machines, dataunit_in_machine)

    gcpa_rt_coverlens = []
    gcpa_times = []

    lg_coverlens = []
    baseline_coverlens = []
    baseline_times = []

    b_baseline_coverlens = []
    b_baseline_times = []
    smaller = 0

    lg_times = []
    for idx, query in enumerate(rt_queries):
        oldlen = len(query)
        if (idx % 1000) == 0:
            print 'Query: ', idx

        if ctype != 'both':
            cover, gcpa_dt = rt_query_process(query, clustering, gcpa_data,
                                              machines, dataunit_in_machine,
                                              ctype)
            gcpa_rt_coverlens.append(len(cover))
            gcpa_times.append(gcpa_dt)
        else:
            cover_fast, gcpa_fast_dt = rt_query_process(
                query, clustering, gcpa_linear, machines, dataunit_in_machine,
                'fast')
            cover_full, gcpa_full_dt = rt_query_process(
                query, clustering, gcpa_linear, machines, dataunit_in_machine,
                'full')
            cover_better_fast, gcpa_better_fast_dt = rt_query_process(
                query, clustering, gcpa_better, machines, dataunit_in_machine,
                'fast')
            cover_better_full, gcpa_better_full_dt = rt_query_process(
                query, clustering, gcpa_better, machines, dataunit_in_machine,
                'full')
            gcpa_rt_coverlens.append(
                map(len, [
                    cover_fast, cover_full, cover_better_fast,
                    cover_better_full
                ]))
            gcpa_times.append([
                gcpa_fast_dt, gcpa_full_dt, gcpa_better_fast_dt,
                gcpa_better_full_dt
            ])

        lg_cover, lg_dt = linear_greedy(query, machines, dataunit_in_machine)

        lg_times.append(lg_dt)

        baseline_cover, baseline_time = baseline(query, machines,
                                                 dataunit_in_machine)

        lg_coverlens.append(len(lg_cover))
        baseline_coverlens.append(len(baseline_cover))
        baseline_times.append(baseline_time)

        b_baseline_cover, b_baseline_time = better_baseline(
            query, machines, dataunit_in_machine)
        b_baseline_coverlens.append(len(b_baseline_cover))
        b_baseline_times.append(b_baseline_time)

    with open(infile + '_cover_len_comparison.csv', 'wb') as f:
        w = csv.writer(f)
        if ctype != 'both':
            w.writerow(['GCPA', 'Greedy', 'Baseline', 'Better Baseline'])
            for idx, cl in enumerate(gcpa_rt_coverlens):
                w.writerow([
                    cl, lg_coverlens[idx], baseline_coverlens[idx],
                    b_baseline_coverlens[idx]
                ])
        else:
            w.writerow([
                'GCPA_G_A', 'GCPA_G_U', 'GCPA_DL_A', 'GCPA_DL_U', 'Greedy',
                'Baseline', 'Better Baseline'
            ])
            for idx, cl in enumerate(gcpa_rt_coverlens):
                cl.extend([
                    lg_coverlens[idx], baseline_coverlens[idx],
                    b_baseline_coverlens[idx]
                ])
                w.writerow(cl)

    with open(infile + '_time_comparison.csv', 'wb') as f:
        w = csv.writer(f)
        if ctype != 'both':
            w.writerow(['GCPA', 'Greedy', 'Baseline', 'Better Baseline'])
            for idx, gcpa_dt in enumerate(gcpa_times):
                w.writerow([
                    gcpa_dt, lg_times[idx], baseline_times[idx],
                    b_baseline_times[idx]
                ])
        else:
            w.writerow([
                'GCPA_G_A', 'GCPA_G_U', 'GCPA_DL_A', 'GCPA_DL_U', 'Greedy',
                'Baseline', 'Better Baseline'
            ])
            for idx, gcpa_dt in enumerate(gcpa_times):
                gcpa_dt.extend([
                    lg_times[idx], baseline_times[idx], b_baseline_times[idx]
                ])
                w.writerow(gcpa_dt)