Exemple #1
0
def main():
    global nDocuments, docIdList

    clusterSets = [8, 16, 32, 64]
    clusterTiming  = []
    clusterEntropy = []

    # preprocess init
    p.init()
    loadClassMasterMap()
    nDocuments = len(p.chiFeatureVectors)

    # 15000 documents took 742 s to run , i.e 12 mins approx
    # nDocuments = 10000
    docIdList = random.sample(p.chiFeatureVectors.keys(), nDocuments)

    for n in clusterSets:
        p.nClusters = n
        start = time.time()
        startKMeans()
        end = time.time()

        print "\nK-Means Clustering for ", n, " clusters took ", end - start, " time"
        entropy = displayClusterCounts()

        clusterTiming.append(end-start)
        clusterEntropy.append(entropy)
        
        print n, end-start, entropy
        print '-------------------------------------------------------------------------------------\n'
    
    plotGraphs(clusterSets, clusterTiming, clusterEntropy)
Exemple #2
0
def main():
    global isVisited, isClustered, neighbourCount, docList, sampledDocId
    p.init(False)
    docList = p.chiFeatureList

    sampledDocId = random.sample(list(xrange(21000)), 20000)
    for docId in sampledDocId:
        sampledDocList.append(docList[docId])

    # knnHistogram()
    # drawHistogram()

    isVisited = [0] * len(sampledDocList)
    isClustered = [0] * len(sampledDocList)
    neighbourCount = [0] * len(sampledDocList)
    print(len(sampledDocList))

    start = time.time()
    findCluster(8)

    # fileObj = open("cluster_result.txt", "w")
    # fileObj.write(str(isClustered))
    # fileObj.close()
    evaluate()
    end = time.time()
    print "Running time", end - start
Exemple #3
0
def main():
    global nDocuments, docIdList

    clusterSets = [8, 16, 32, 64]
    clusterTiming = []
    clusterEntropy = []

    # preprocess init
    p.init()
    loadClassMasterMap()
    nDocuments = len(p.chiFeatureVectors)

    # 15000 documents took 742 s to run , i.e 12 mins approx
    # nDocuments = 10000
    docIdList = random.sample(p.chiFeatureVectors.keys(), nDocuments)

    for n in clusterSets:
        p.nClusters = n
        start = time.time()
        startKMeans()
        end = time.time()

        print "\nK-Means Clustering for ", n, " clusters took ", end - start, " time"
        entropy = displayClusterCounts()

        clusterTiming.append(end - start)
        clusterEntropy.append(entropy)

        print n, end - start, entropy
        print '-------------------------------------------------------------------------------------\n'

    plotGraphs(clusterSets, clusterTiming, clusterEntropy)
Exemple #4
0
def main(argv):

	file_path = argv[1]
	num_questions = argv[2]

	extract_sentences = preprocess.init(file_path)

	questions = tag.main(extract_sentences, num_questions)
def main(argv):

    file_path = argv[1]
    num_questions = argv[2]

    extract_sentences = preprocess.init(file_path)

    questions = ask_sys.init(extract_sentences)

    for question in questions:
        print(question)
def main(argv):

	file_path = argv[1]
	num_questions = argv[2]

	extract_sentences = preprocess.init(file_path)

	questions = ask_sys.init(extract_sentences)

	for question in questions:
		print(question)
Exemple #7
0
def main(csv, feature):
    """
    Draw a bar chart that ranks the importance of feature combinations in order.

    Parameters
    ----------
    csv: `str`
        Path to csv file that contains the features.
    feature: `str`
        Name of the feature(s) for the random forest classifier.

    Returns
    -------

    """
    # The order in this list should be the same as columns in features.csv
    # column_names = ["NetworkType", "SubType", "ClusteringCoefficient", "DegreeAssortativity",
    #                 "m4_1", "m4_2", "m4_3", "m4_4", "m4_5", "m4_6"]
    # features: "sepal_length", "sepal_width", "petal_length", "petal_width"
    column_names = ["NetworkType", "SubType"]
    column_names += list(feature)

    isSubType = True

    csv_file = csv

    # at_least is used for filtering out classes whose instance is below this threshold.
    at_least = 0
    X, Y, sub_to_main_type, feature_order = init(csv_file, column_names,
                                                 isSubType, at_least)

    # the number of iteration for multi-class classification
    N = 1000

    # Valid methods are: "RandomOver", "RandomUnder", "SMOTE" and "None"
    sampling_method = "None"
    print("sampling_method: %s" % sampling_method)
    print("Number of instances: %d" % len(Y))

    Matrix, NetworkTypeLabels, sum_accuracy, list_important_features = \
        sum_confusion_matrix(X, Y, sub_to_main_type, feature_order, isSubType, sampling_method, N)

    average_matrix = np.asarray(
        list(map(lambda row: list(map(lambda e: e / N, row)), Matrix)))
    print("average accuracy: %f" % (float(sum_accuracy) / float(N)))
    plot_feature_importance(list_important_features, feature_order)
Exemple #8
0
def main():
    column_names = [
        "NetworkType", "SubType", "ClusteringCoefficient",
        "DegreeAssortativity", "m4_1", "m4_2", "m4_3", "m4_4", "m4_5", "m4_6"
    ]

    isSubType = True
    at_least = 1
    X, Y, sub_to_main_type, feature_order = init("features.csv", column_names,
                                                 isSubType, at_least)
    N = 100

    c1_name = "ER Network"
    c2_name = "PeerToPeer"
    # c2 to c1
    c1_X, c1_Y, c2_X, c2_Y = separator(X, Y, c1_name, c2_name)
    plot(c1_X, c2_X, feature_order, c1_name, c2_name)
Exemple #9
0
def main():
    column_names = ["NetworkType", "SubType", "ClusteringCoefficient", "DegreeAssortativity", "m4_1", "m4_2", "m4_3",
                    "m4_4", "m4_5", "m4_6"]

    isSubType = True
    at_least = 1
    X, Y, sub_to_main_type, feature_order = init("features.csv", column_names, isSubType, at_least)
    N = 100

    # synthesized to real
    real_X, real_Y, synthesized_X, synthesized_Y = separator(X, Y)
    bp_tuple_L = base_to_predict(synthesized_X, synthesized_Y, real_X, real_Y)
    Ys, accum_dic = make_layers(bp_tuple_L, Synthesized)
    plot_accumulation(Ys, accum_dic)

    # real to synthesized
    bp_tuple_L = base_to_predict(*separator(X, Y))
    Ys, accum_dic = make_layers(bp_tuple_L, Y)
    plot_accumulation(Ys, accum_dic)
Exemple #10
0
def main(csv, features, iter):
    # -f degree -f betweenness -f closeness -f eigencentrality -f coreness -f layerness -f pagerank -f sum_friends_friends -f transitivity

    column_names = ["NetworkType", "SubType"] + list(features)
    isSubType = True  # use SubType as the labels for classification
    at_least = 0

    X, Y, sub_to_main_type, feature_order = init(csv, column_names, isSubType, at_least)

    N = iter

    # network subtype one is interested in
    one = "seed"

    X_converted, Y_converted = convert_one_to_many(X, Y, one)

    list_accuracies, list_important_features, list_auc = many_classifications(
        X_converted, Y_converted, feature_order, N
    )

    print("average accuracy: %f" % (float(sum(list_accuracies)) / float(N)))
    print("average AUC: %f" % (float(sum(list_auc)) / float(N)))

    dominant_features = plot_feature_importance(list_important_features, feature_order)

    first = dominant_features[0][0][0]
    second = dominant_features[1][0][0]
    if first == second:
        second = dominant_features[1][1][0]

    Y_converted_string_labels = [one if y == 1 else "non-seed" for y in Y_converted]

    x_label = first
    y_label = second
    x_index = feature_order.index(x_label)
    y_index = feature_order.index(y_label)

    plot_2d(np.array(X_converted), np.array(Y_converted_string_labels), x_index, y_index, x_label, y_label)
opt['pretrained_words'] = True
opt['vocab_size'] = embedding.size(0)
opt['embedding_dim'] = embedding.size(1)
opt['pos_size'] = len(meta['vocab_tag'])
opt['ner_size'] = len(meta['vocab_ent'])
opt['cuda'] = args.cuda
opt['classes'] = {'Y': 0, 'N': 1}
opt['id_classes'] = {0: 'Yes', 1: 'No'}
opt['interact'] = True
BatchGen.pos_size = opt['pos_size']
BatchGen.ner_size = opt['ner_size']
model = LegalQAClassifier(opt, embedding, state_dict)
w2id = {w: i for i, w in enumerate(meta['vocab'])}
tag2id = {w: i for i, w in enumerate(meta['vocab_tag'])}
ent2id = {w: i for i, w in enumerate(meta['vocab_ent'])}
init()


def interact_entailment(article, query):
    annotated = annotate(('interact-entailment', article, query, 'Y'))
    model_in = to_id(annotated, w2id, tag2id, ent2id)
    model_in = next(
        iter(
            BatchGen([model_in],
                     opt,
                     batch_size=1,
                     gpu=args.cuda,
                     evaluation=True)))
    prediction, values = model.interact_(model_in)

    max_probs = torch.max(prediction, 1)[1].item()
Exemple #12
0
from Routenetwork import Routenetwork
from preprocess import init, distanceList
from Astar import Astar
from Latlong import ShowNodesOnMap

data = init()
nodes = data[0]
edges = data[1]
nodedict = data[2]

g = Routenetwork()
for item in edges:
    g.addEdge(item)

g_edges = g.getEdges()
distances = distanceList(nodes, nodedict, g_edges)
for items in nodes:
    distances[(items, items)] = 0
start = 'Birla Institute of Technology Hyderabad'
end = 'RGIA'

path = Astar(g, nodedict, distances, start, end)

print('Final Route :')
print('****Start****')
for item in path:
    print(item)
print('****End****')
ShowNodesOnMap(path, nodedict)
Exemple #13
0
    import mapreads
    import validate
    import multialign
    import findorfs
    import findreps
    import abundance
    import annotate
    import fannotate
    import scaffold
    import findscforfs
    import propagate
    import classify
    import postprocess

    # initialize submodules
    preprocess.init(readlibs, asmcontigs, skipsteps, selected_programs["assemble"], run_fastqc,selected_programs["preprocess"])
    assemble.init(readlibs, skipsteps, selected_programs["assemble"], asmcontigs, userKmerSupplied == False)
    mapreads.init(readlibs, skipsteps, selected_programs["mapreads"], savebtidx,ctgbpcov,lowmem)
    validate.init(readlibs, skipsteps, selected_programs["validate"], asmScores)
    findorfs.init(readlibs, skipsteps, selected_programs["findorfs"], min_ctg_len, min_ctg_cvg,read_orfs)
    findreps.init(readlibs, skipsteps)
    multialign.init(readlibs, skipsteps, forcesteps, selected_programs["multialign"],refgenomes)
    annotate.init(readlibs, skipsteps, selected_programs["annotate"], nofcpblast)
    fannotate.init(skipsteps)
    abundance.init(readlibs, skipsteps, forcesteps, selected_programs["annotate"])
    scaffold.init(readlibs, skipsteps, retainBank)
    findscforfs.init(readlibs, skipsteps, selected_programs["findorfs"])
    propagate.init(readlibs, skipsteps, selected_programs["annotate"])
    classify.init(readlibs, skipsteps, selected_programs["annotate"], lowmem, 0 if not isolate_genome else 100)
    postprocess.init(readlibs, skipsteps, selected_programs["annotate"])
    generic.init(skipsteps, readlibs)
Exemple #14
0
    import preprocess
    import assemble
    import mapreads
    import findorfs
    import findreps
    import abundance
    import annotate
    import fannotate
    import scaffold
    import findscforfs
    import propagate
    import classify
    import postprocess

    # initialize submodules
    preprocess.init(readlibs, skipsteps, selected_programs["assemble"], run_fastqc,filter)
    assemble.init(readlibs, skipsteps, selected_programs["assemble"], usecontigs)
    mapreads.init(readlibs, skipsteps, selected_programs["assemble"], selected_programs["mapreads"], savebtidx,ctgbpcov,lowmem)
    findorfs.init(readlibs, skipsteps, selected_programs["assemble"], selected_programs["findorfs"], min_ctg_len, min_ctg_cvg)
    findreps.init(readlibs, skipsteps)
    annotate.init(readlibs, skipsteps, selected_programs["classify"], nofcpblast)
    fannotate.init(skipsteps)
    abundance.init(readlibs, skipsteps, forcesteps, selected_programs["classify"])
    scaffold.init(readlibs, skipsteps, retainBank, selected_programs["assemble"])
    findscforfs.init(readlibs, skipsteps, selected_programs["findorfs"])
    propagate.init(readlibs, skipsteps, selected_programs["classify"])
    classify.init(readlibs, skipsteps, selected_programs["classify"])
    postprocess.init(readlibs, skipsteps, selected_programs["classify"])

    try:
       dlist = []
Exemple #15
0
    import multialign
    import findorfs
    import findreps
    import abundance
    import classify
    import classifyreads
    import fannotate
    import scaffold
    import findscforfs
    import propagate
    import bin
    import postprocess

    # initialize submodules
    preprocess.init(readlibs, asmcontigs, skipsteps,
                    selected_programs["assemble"], run_fastqc,
                    selected_programs["preprocess"])
    assemble.init(readlibs, skipsteps, selected_programs["assemble"],
                  asmcontigs, (userKmerSupplied == False and isolate_genome))
    mapreads.init(readlibs, skipsteps, selected_programs["mapreads"],
                  savebtidx, ctgbpcov, lowmem)
    benchmark.init(readlibs, skipsteps, availableRulers_dict["classifyreads"])
    validate.init(readlibs, skipsteps, selected_programs["validate"],
                  asmScores)
    findorfs.init(readlibs, skipsteps, selected_programs["findorfs"],
                  min_ctg_len, min_ctg_cvg, read_orfs)
    findreps.init(readlibs, skipsteps)
    multialign.init(readlibs, skipsteps, forcesteps,
                    selected_programs["multialign"], refgenomes)
    classify.init(readlibs, skipsteps, selected_programs["classify"],
                  nofcpblast)
Exemple #16
0
	_, frame = cap.read()

	# frame copy for initial processes
	initial_frame = frame
	final_frame = frame

	# Initially, assume that the grid has not been preprocessed
	gridProcessed = False


	# Preprocessing starts here ----------------------------------------------------------------------

	# finding initial corner points -------------------------------------------------------------

	# calling the init function to give the dilated frame
	initial_dil = pre.init(frame)

	# we get the corners of the sudoku grid. 
	# This is the first run, hence there is no angle correction
	initial_corner = pre.contourEndPoints(initial_dil)

	# proceed only if there are contours
	if initial_corner != 0:
		initial_corner = np.float32(initial_corner)

		# perspective transformation
		initial_pers_trans = cv2.getPerspectiveTransform(initial_corner, puzzle_corner)
		initial_puzzle_frame = cv2.warpPerspective(initial_frame, initial_pers_trans, (477,477))
		final_puzzle_frame = initial_puzzle_frame # inital value