コード例 #1
0
ファイル: main.py プロジェクト: albertpod/documentClustering
def main():

    # A value which determines the whether we have to stop updating clusters
    # (the optimization has 'converged' )
    epsilon = 0.001

    printFlag = False

    myBag, collection = dataPreparation.dataCollect()
    myArray = dataPreparation.vectorization(myBag, collection, printFlag)

    clusters = dataPreparation.klusterNum(myArray)

    # Number of the clusters
    numClusters = len(clusters)
    outfile = open("output.json", "w")

    # Cluster those data!

    print("Clustering started...\n")
    clusters = cluster.kmeans(myArray, numClusters, epsilon)
    print("Success\n")

    print ("K-means precision = ", cluster.precision(clusters), "\n")

    # Print our clusters
    for i, c in enumerate(clusters):
        print ("Cluster: ", i, file = outfile)
        for p in c.vectors:
            print ("Vector :", p['topic'], p['id'], file = outfile)
        print('\n', file = outfile)
コード例 #2
0
def run_transform(name, data_x, data_y, transformer):
    print("Working on {}...".format(name))

    report_name = "reports/{}_nn_output.txt".format(name)
    sys.stdout = open(report_name, "w")

    #2 transform the data
    transform_x = transformer(data_x, data_y, name)
    plot_corr(name, pd.DataFrame(data=transform_x), data_y)

    kmeans_name = "{} KMeans Clustered".format(name)
    gmm_name = "{} GMM Clustered".format(name)

    #3 cluster the transformed data
    kmeans_clustered = kmeans(kmeans_name, transform_x, data_y)
    gmm_clustered = gmm(gmm_name, transform_x, data_y)

    #4 run neural network on transformed data
    x_train, x_test, y_train, y_test = split_data(transform_x, data_y)
    run_nn(name, x_train, x_test, y_train, y_test)

    #5 call run_nn on cluster from #3 (clustered from dimensionally reduced)
    kmx_train, kmx_test, kmy_train, kmy_test = split_data(
        kmeans_clustered, data_y)
    run_nn(kmeans_name, kmx_train, kmx_test, kmy_train, kmy_test)

    gmmx_train, gmmx_test, gmmy_train, gmmy_test = split_data(
        gmm_clustered, data_y)
    run_nn(gmm_name, gmmx_train, gmmx_test, gmmy_train, gmmy_test)

    sys.stdout = sys.__stdout__

    print("Finished {}!".format(name))
    print()
コード例 #3
0
ファイル: evaluate.py プロジェクト: RobinWu218/mcr2
def kmeans(args, train_features, train_labels, test_features, test_labels):
    """Perform KMeans clustering. 
    
    Options:
        n (int): number of clusters used in KMeans.
    """
    return cluster.kmeans(args, train_features, train_labels)
コード例 #4
0
ファイル: specific_tests.py プロジェクト: iHeartGraph/Graggle
def thresh_search(fname, nw=200, wl=10, w2v={}, ignore_list=[], top_n=None):
    f = open('thresh_search.csv', 'w+')
    cw = csv.writer(f)
    cw.writerow(['Thresh', 'Purity'])

    og = sys.stdout
    be_quiet = EmptyStdout()

    for t in range(0, 255):
        t = t / 5
        purities = []

        sys.stdout = be_quiet
        for _ in range(10):
            X, y = get_vectors(fname,
                               thresh=t,
                               nw=nw,
                               wl=10,
                               w2v_params=w2v,
                               top_n=top_n,
                               ignore_list=ignore_list)

            n = len(set(y)) + 1
            purities.append(kmeans(X, y, n)['purity']['Total'])

        sys.stdout = og
        purity = sum(purities) / 5
        print('T:%f, Purity: %0.4f' % (t, purity))
        cw.writerow([t, purity])

    f.close()
コード例 #5
0
ファイル: analysis.py プロジェクト: BeagleLearning/beagleNLP
def customClusterQuestions(docs, algorithm, parameters, removeOutliers=True):
    """
    Customized question clustering methods. Implemented as a knock-off to prevent interupting with production builds

    Parameters:
        docs (list): List of strings (questions)
        algorithm (str):  The clustering algorithm that will be used
            (DBSCAN, K Means, Gaussian K Means, Agglomerative Clustering)
        parameters (string): Parameters used to initiate the clustering algorithms
            (epsilon, clusters, strictness, threshold)
        removeOutliers (bool): A flag to determine whether to remove outliers or not
            (default is True)

    Returns:
        corpus: Corpus that has clusters list attached to it
    """

    corpus = tagAndVectorizeCorpus(docs)
    params = [{param["param"]: param["value"]} for param in parameters][0]
    if algorithm == "DBSCAN":
        return cluster.dbscan(corpus, float(params["epsilon"]))

    if algorithm == "K Means":
        return cluster.kmeans(corpus, int(params["clusters"]), removeOutliers)

    if algorithm == "Gaussian K Means":
        return cluster.g_kmeans(corpus, min(4, int(params["strictness"])), removeOutliers)

    if algorithm == "Agglomerative Clustering":
        return cluster.agglomerate(corpus, float(params["threshold"]), removeOutliers)
コード例 #6
0
def run_original(data_x, data_y, name):
    print("Running {} original charts and clustering".format(name))

    # Run Original
    report_name = "reports/{}_nn_output.txt".format(name)
    sys.stdout = open(report_name, "w")

    x_train, x_test, y_train, y_test = split_data(data_x, data_y)
    run_nn(name, x_train, x_test, y_train, y_test)

    plot_corr("{} Original".format(name), x_train, y_train)

    # Cluster Original (#1)
    kmeans("{} KMeans".format(name), data_x, data_y)
    gmm("{} GMM".format(name), data_x, data_y)

    sys.stdout = sys.__stdout__

    print("Finished {} original charts and clustering".format(name))
コード例 #7
0
def main():
    (options, args) = parser.parse_args()
    mfile = open(options.m, 'w')  #map file
    print >> mfile, open(options.input1).read() % ("Twitter Users")
    print >> mfile, open(options.input2).read()
    sfile = open(options.script, 'w')  #script file
    initialize = open(options.initialize, 'r').read()
    print >> sfile, initialize % (38, -27)
    if options.f:
        ufile = open(options.u, 'w')
        print >> ufile, options.f
        ufile.close()
    else:
        statout = commands.getstatusoutput("ls %s > %s" %
                                           (options.d, options.u))
    users = open(options.u, 'r')  #user listing
    points = []
    num = 1
    for line in users:  #for each user in listing
        user = options.d + line[0:-1]  #absolute path to user data
        print "user: "******"  lat: %s; lon: %s" % (lat,lon)
#            if lat and lon and num<37:
#                marker = open(options.marker).read()
#                print >>sfile, marker % (num,lat,lon,num,num,username)
#                num += 1
        except:
            print "  error unpacking location"


#    print "points: %s" % points
    markers = kmeans(points, 50, 100)  #clusters of users
    print "markers: %s" % markers
    i = 0  #counter
    for m in markers:
        num = m[0]  #number of members in cluster
        lat = m[1][0]  #latitude of cluster mean
        lon = m[1][1]  #longitude of cluster mean
        if num > 0:  #cluster contains 1 or more users
            marker = open(options.marker).read()
            print >> sfile, marker % (i, lat, lon, i, i, num)
            i += 1
    print >> sfile, open(options.close, 'r').read()
コード例 #8
0
def saveKmeans(filename, target_dir, k, scvis=False):
    createDir(target_dir)
    _, k_mask = cluster.kmeans(filename, clusters=k, scvis=scvis)
    if not scvis:
        with open(target_dir + "color_mask_" + str(k) + ".txt", "w+") as o:
            for k_ in k_mask:
                o.write(str(k_) + "\n")
    else:
        with open(target_dir + "color_mask_" + str(k) + "_scvis.txt",
                  "w+") as o:
            for k_ in k_mask:
                o.write(str(k_) + "\n")
コード例 #9
0
def main():
    (options, args) = parser.parse_args()
    mfile = open(options.m, "w")  # map file
    print >> mfile, open(options.input1).read() % ("Twitter Users")
    print >> mfile, open(options.input2).read()
    sfile = open(options.script, "w")  # script file
    initialize = open(options.initialize, "r").read()
    print >> sfile, initialize % (38, -27)
    if options.f:
        ufile = open(options.u, "w")
        print >> ufile, options.f
        ufile.close()
    else:
        statout = commands.getstatusoutput("ls %s > %s" % (options.d, options.u))
    users = open(options.u, "r")  # user listing
    points = []
    num = 1
    for line in users:  # for each user in listing
        user = options.d + line[0:-1]  # absolute path to user data
        print "user: "******"/") + 1 : user.rfind(".")]  # isolate username
        print username
        if options.v:  # verbose option
            print username
        data = open(user, "r")  # user data file
        line1 = data.readline()
        print line1
        try:
            add, lat, lon, ts = line1.split("$xyzzy$")
            lon = lon[:-1]
            if lat and lon:
                points.append([username, lat, lon])
        #            print "  lat: %s; lon: %s" % (lat,lon)
        #            if lat and lon and num<37:
        #                marker = open(options.marker).read()
        #                print >>sfile, marker % (num,lat,lon,num,num,username)
        #                num += 1
        except:
            print "  error unpacking location"
    #    print "points: %s" % points
    markers = kmeans(points, 50, 100)  # clusters of users
    print "markers: %s" % markers
    i = 0  # counter
    for m in markers:
        num = m[0]  # number of members in cluster
        lat = m[1][0]  # latitude of cluster mean
        lon = m[1][1]  # longitude of cluster mean
        if num > 0:  # cluster contains 1 or more users
            marker = open(options.marker).read()
            print >> sfile, marker % (i, lat, lon, i, i, num)
            i += 1
    print >> sfile, open(options.close, "r").read()
コード例 #10
0
 def generate_clusteredpoints(filepath, methodType, dataIndex, recordList):
     oripoints = processtool.read_points_fromfile(filepath)
     points = processtool.generate_plist(oripoints)
     if methodType == 'kmeans':
         kmeansfactory = kmeans(__kmeansk__[dataIndex], points)
         kmeansfactory.kmeansClusterWithRecord(recordList)
         #kmeansfactory.printResult()
         return kmeansfactory.points
     elif methodType == 'dbscan':
         #dbscanfactory = dbscan(1.1, 5, points)
         dbscanfactory = dbscan(epsSelect[dataIndex], MinPtsSelect[dataIndex], points)
         dbscanfactory.dbscanCluster()
         #dbscanfactory.printResult()
         return dbscanfactory.points
コード例 #11
0
ファイル: system.py プロジェクト: justheuristic/GraveRobot
 def __init__(self,keyParams,boolParams, numParams, samples, samplesCount, nodesCount, majorant = False):
     '''the LICS itself'''
     '''reverse keyparams pls!'''
     self.boolParams = boolParams
     self.numParams = numParams
     self.keyParams = keyParams
     self.keyStatements = [ statements.get_statement(statements.op_takeValue,p) for p in keyParams]
     self.boolStatements = [ statements.get_statement(statements.op_takeValue,p) for p in boolParams]
     self.numStatements = [ statements.get_statement(statements.op_takeValue,p) for p in numParams]
     self.samples = set(samples)
     self.samplesCount = samplesCount
     self.nodesCount = nodesCount
     self.tree = treeNode.treeNode(self.samples,self.keyStatements,majorant)
     self.clusteriser = cluster.kmeans(self,numParams,boolParams + keyParams,samplesCount)
     self.isMajorant = majorant
コード例 #12
0
def approximate_corners(IMAGE_IN, IMAGE_OUT=None, PTS_OUT=None, HOM_OUT=None):
    """
    Approximates the locations of corners on each of two calibration boards in ChESS
    response image with filename *IMAGE_IN*. If requested, an image is written to
    *IMAGE_OUT* for visual inspection, a matrix of (x, y) image coordinates in the
    shape of each board is np.save()'d to *PTS_OUT*0 and *PTS_OUT*1, respectively,
    and a homography from [x, y, 1]^T (with x in range(width) and y in range(height))
    to each board's image coordinates is np.savetxt()'d to *HOM_OUT*[0,1].npy, again
    respectively.
    """
    # read in image and disregard pixels of very slight intensity
    image_in = np.asarray(cv2.imread(IMAGE_IN))
    nrows = len(image_in)
    ncols = len(image_in[0])
    image_out = np.zeros((nrows, ncols, 3), dtype=np.int)
    
    points = [ [x, y, image_in[y][x][0]]
               for y in range(nrows)
               for x in range(ncols)
               if image_in[y][x][0] > 15 ]
    
    # 2-means cluster to find the boards
    x_min = min([ point[0] for point in points ])
    x_max = max([ point[0] for point in points ])

    boards = cl.kmeans(points, [[x_min, 0], [x_max, 0]], 5)
    
    # process each board separately
    for b, board in enumerate(boards):
        # do some filtering to (hopefully!) get one point per corner
        board = _reduce(board)
        
        # guess homography and point order
        if HOM_OUT != None:
            THIS_HOM_OUT = HOM_OUT + str(b)
        else:
            THIS_HOM_OUT = None
        board = _reshape(board, THIS_HOM_OUT)
        
        # output
        if PTS_OUT != None:
            np.save(PTS_OUT + str(b), board)
        if IMAGE_OUT != None:
            _imwrite(image_out, board, b)
    
    # output
    if IMAGE_OUT != None:
        cv2.imwrite(IMAGE_OUT, image_out)
コード例 #13
0
def saveGeneTable(filename, file_after_reduction, target_dir, k, scvis=False):
    createDir(target_dir)
    _, k_mask = cluster.kmeans(file_after_reduction, clusters=k, scvis=scvis)
    data = loadTSV(filename)
    indexes = []
    for _ in range(k):
        indexes.append([])
    for index in range(len(k_mask)):
        indexes[k_mask[index]].append(index)
    result = np.zeros(shape=(k, len(data[0])))
    for _ in range(k):
        result[_] = np.mean(data[indexes[_]], axis=0)
    if not scvis:
        np.savetxt(target_dir + "geneTable_" + str(k) + ".txt",
                   result.transpose(),
                   delimiter="\t")
    else:
        np.savetxt(target_dir + "geneTable_" + str(k) + "_scvis.txt",
                   result.transpose(),
                   delimiter="\t")
コード例 #14
0
ファイル: specific_tests.py プロジェクト: iHeartGraph/Graggle
def wl_search(fname, ignore_list=[], top_n=None, thresh=5, w2v={}):
    f = open('wl_search.csv', 'w+')
    cw = csv.writer(f)
    cw.writerow(['Num Walks', 'Walk Len', 'Purity'])

    og = sys.stdout
    be_quiet = EmptyStdout()

    for nw in range(100, 900, 100):
        if nw == 0:
            nw = 10

        for window in range(2, 10):
            w2v['window'] = window

            purities = []

            sys.stdout = be_quiet
            for _ in range(5):
                X, y = get_vectors(fname,
                                   thresh=thresh,
                                   nw=nw,
                                   wl=10,
                                   w2v_params=w2v,
                                   top_n=top_n,
                                   ignore_list=ignore_list)

                n = len(set(y)) + 1
                purities.append(kmeans(X, y, n)['purity']['Total'])

            sys.stdout = og
            purity = sum(purities) / 5
            print('NW: %d, WL:%d, Purity: %0.4f' % (nw, window, purity))
            cw.writerow([nw, window, purity])

    f.close()
コード例 #15
0
ファイル: ai.py プロジェクト: lostact/SearchAndDefuse
    def decide(self):
        start = time.time()
        print('decide' + ' ' + str(self.current_cycle))
        if self.my_side == 'Police':
            my_agents = self.world.polices
            # return 0
        else:
            my_agents = self.world.terrorists
        # ignore dead agents
        alive_agents = []
        for agent in my_agents:
            if agent.status == EAgentStatus.Dead:
                if self.my_side == 'Terrorist':
                    if self.target_bombsites[agent.id]:
                        if self.target_bombsites[agent.id] in self.bombsites:
                            self.bombsites[self.target_bombsites[agent.id]]['task'] = 0
                            self.bombsites[self.target_bombsites[agent.id]]['status'] = -1
                        self.target_bombsites[agent.id] = None
            else:
                alive_agents.append(agent)
        # update bombsites if terrorists score is changed:
        exploded_bombsites = []
        if self.last_scores['Terrorist'] != self.world.scores['Terrorist']:
            for bombsite in self.bombsites:
                if self.world.board[bombsite[0]][bombsite[1]] == ECell.Empty:   
                    exploded_bombsites.append(bombsite)
            for bombsite in exploded_bombsites:
                del self.bombsites[bombsite]
            if exploded_bombsites:
                print(exploded_bombsites)
                updated,unreachables,bypassed_bombsites_list = False, [], []
                for i,bombsite in enumerate(self.unreachable_bombsites):
                    distance, path, bypassed_bombsites = self._a_star(self.start_pos,bombsite,agent_block=False,bombsite_block=False)
                    if not bypassed_bombsites:
                        self.bombsites[bombsite] = {'size':self.world.board[bombsite[0]][bombsite[1]],'initial_distance':distance, 'status':-2,'task':0,'bscore':0, 'failed':0, 'agent':None,'ert':-1}
                        updated = True
                        print('unreachable opened:',bombsite,self.bombsites[bombsite])
                    else:
                        unreachables.append(bombsite)
                        bypassed_bombsites_list.append((bombsite,bypassed_bombsites))
                self.unreachable_bombsites = unreachables
                if updated:
                    for bombsite in self.bombsites:
                        self.bombsites[bombsite]['bscore'] = 0
                    for bombsite, bypassed_bombsites in bypassed_bombsites_list:
                        bypassed_bombsite = bypassed_bombsites[-1]
                        unreachable_size = self.world.board[bombsite[0]][bombsite[1]]
                        self.bombsites[bypassed_bombsite]['bscore'] += (self.BOMBSITE_COEFFICIENT[unreachable_size] * self.unreachable_bscore_coefficient)
                if self.my_side == 'Police':
                    bombsite_positions = sorted(self.bombsites, key = lambda x: self.bombsites[x]['initial_distance'])
                    self.clusters, self.cluster_centers = kmeans(bombsite_positions, len(my_agents))
                    self.cluster_index = [0] * len(my_agents)
                    print(self.clusters,self.cluster_centers)
        # prevent passing by exploding bombs
        for bomb in self.world.bombs:
            if bomb.explosion_remaining_time == 1:
                for neighbor in  bomb.position.get_neighbours(self.world):
                    self.world.board[neighbor.y][neighbor.x] = ECell.Wall

        if self.my_side == 'Terrorist':
            # update bombsites if bomb(s) is defused
            if self.last_scores['Police'] != self.world.scores['Police']:
                bomb_count = len(self.world.bombs)
                for last_bomb in self.last_bombs:
                    for index,bomb in enumerate(self.world.bombs):
                        if bomb.position == last_bomb.position:
                            break
                        elif index == bomb_count - 1:
                            bombsite_position = self._position_to_tuple(last_bomb.position)
                            if bombsite_position in self.bombsites:
                                self.bombsites[bombsite_position]['status'] = -1
                                self.bombsites[bombsite_position]['failed'] += 2
            self.last_bombs = self.world.bombs


            # choose nearest agent for each bombsite if bombsites number is smaller than agents number
            not_planted_bombsites = 0
            for bombsite in self.bombsites:
                if self.bombsites[bombsite]['status'] != 2:
                    not_planted_bombsites += 1
            if not_planted_bombsites < len(alive_agents):
                for agent_id,agent in enumerate(my_agents):
                    if agent.planting_remaining_time == -1:
                        self.target_bombsites[agent_id] = None

                for bombsite in self.bombsites:
                    if self.bombsites[bombsite]['status'] != 2:
                        min_distance = 1000
                        for agent in alive_agents:
                            if not self.target_bombsites[agent.id]:
                                distance, _ = self._a_star(agent.position,bombsite)
                                if distance < min_distance:
                                    best_agent = agent.id
                                    min_distance = distance
                        self.bombsites[bombsite]['task'] = 1
                        self.target_bombsites[best_agent] = bombsite
            print('targets:',self.target_bombsites)
        else:# police
            # update bombsites status according to sounds and visions
            for bombsite in self.bombsites:
                if self.bombsites[bombsite]['status'] < 0:
                    self.bombsites[bombsite]['status'] += 1
            in_vision_bombs = []
            for bomb in self.world.bombs:
                bombsite = self._position_to_tuple(bomb.position)
                self.bombsites[bombsite]['brt'] = bomb.explosion_remaining_time
                in_vision_bombs.append(bombsite)
            for agent in alive_agents:
                if agent.defusion_remaining_time == -1:
                    for bombsite in self.bombsites:
                        if self.bombsites[bombsite]['status'] >= 0:
                            if self._heuristic(bombsite,agent.position) <= self.world.constants.police_vision_distance:
                                if not bombsite in in_vision_bombs:
                                    if self._heuristic(bombsite,agent.position) == 1:
                                        self.bombsites[bombsite]['status'] = -1 * self.world.constants.bomb_planting_time - 1
                                    else:
                                        self.bombsites[bombsite]['status'] = -2
                                    self.bombsites[bombsite]['task'] = 0
                                else:
                                    self.bombsites[bombsite]['status'] = 2
                sound_counts = {ESoundIntensity.Weak:0,ESoundIntensity.Normal:0,ESoundIntensity.Strong:0}
                in_range_sites = {ESoundIntensity.Weak:[],ESoundIntensity.Normal:[],ESoundIntensity.Strong:[]}
                for sound in agent.bomb_sounds:
                    sound_counts[sound] += 1
                for bombsite in self.bombsites:
                    if self._heuristic(bombsite,agent.position) <= self.world.constants.sound_ranges[ESoundIntensity.Weak]:
                        distance, _ = self._a_star(agent.position, bombsite, not_valid_ecells=[ECell.Wall],agent_block=False)
                        if distance:
                            minimum_distance = self.world.constants.police_vision_distance
                            for sound_kind in self.sound_kinds:
                                if distance <= self.world.constants.sound_ranges[sound_kind] and distance > minimum_distance:
                                    if sound_counts[sound_kind]:
                                        status = self.bombsites[bombsite]['status']
                                        in_range_sites[sound_kind].append((bombsite,status,distance))
                                    else:
                                        self.bombsites[bombsite]['status'] = -2
                                        if self.target_bombsites[agent.id] == bombsite:
                                            self.checked_bombsites[agent.id].append(bombsite)
                                    break
                                minimum_distance = self.world.constants.sound_ranges[sound_kind]
                for sound_kind in self.sound_kinds:
                    if sound_counts[sound_kind] > 0:
                        possibles,sures = [], []
                        for index,(bombsite,status,distance) in enumerate(in_range_sites[sound_kind]):
                            if status >= 2:
                                sures.append(bombsite)
                            elif status >= 0:
                                possibles.append(bombsite)
                        if len(sures) == sound_counts[sound_kind]:
                            for bombsite in possibles:
                                self.bombsites[bombsite]['status'] = -2
                        else:
                            status = int((len(sures) + len(possibles)) <= sound_counts[sound_kind]) + 1
                            for bombsite in possibles:
                                self.bombsites[bombsite]['status'] = status
                                # print(agent.id,'at',agent.position,bombsite,status,in_range_sites)
            for bombsite in self.bombsites:
                if self.bombsites[bombsite]['ert'] > -1:
                    self.bombsites[bombsite]['ert'] -= 1
                elif self.bombsites[bombsite]['status'] == 2:
                    self.bombsites[bombsite]['ert'] = self.world.constants.bomb_explosion_time

        self.last_scores = self.world.scores
        # print(self.bombsites)
        for agent in alive_agents:
            if self.my_side == 'Police':
                bombsite_direction = self._find_bombsite_direction(agent)
                doing_bomb_operation = (agent.defusion_remaining_time != - 1)
                if doing_bomb_operation: # defusing
                    bombsite_position = self._sum_pos_tuples(self.DIR_TO_POS[bombsite_direction],(agent.position.y,agent.position.x))
                    self._agent_print(agent.id, 'Continuing bomb operation')
                    self.bombsites[bombsite_position]['status'] = 2
                    if agent.defusion_remaining_time == 1:
                        self.bombsites[bombsite_position]['status'] =  -1 * self.world.constants.bomb_planting_time - 1
                        self.bombsites[bombsite_position]['task'] = 0
                    continue
                if bombsite_direction:
                    try:
                        multiple = len(bombsite_direction)
                    except:
                        multiple = False
                    if multiple:
                        bombsite_direction = bombsite_direction[0]
                    bombsite_position = self._sum_pos_tuples(self.DIR_TO_POS[bombsite_direction],(agent.position.y,agent.position.x))
                    if self.target_bombsites[agent.id] == bombsite_position or multiple:
                        for bomb in self.world.bombs:
                            if self._position_to_tuple(bomb.position) == bombsite_position:
                                if bomb.explosion_remaining_time < self.world.constants.bomb_defusion_time:
                                    has_time = False
                                else:
                                    has_time = True
                                break
                        if has_time:
                            self._agent_print(agent.id, 'Starting bomb operation')
                            self.defuse(agent.id, bombsite_direction)
                            self.bombsites[bombsite_position]['task'] = 2
                            continue
                        else:
                            # self.scape_bombsite(agent)
                            self._agent_print(agent.id, 'direction: Ignoring bomb due to lack of time :(')
                            self.bombsites[bombsite_position]['status'] = 3
                            self.bombsites[bombsite_position]['task'] = 1
                            self.bombsites[bombsite_position]['agent'] = -1
                if self.world.bombs and not bombsite_direction: # bomb in vision:
                    found = False
                    for bomb in self.world.bombs:
                        bombsite_position = self._position_to_tuple(bomb.position)
                        if self.bombsites[bombsite_position]['task'] < 2 and self.bombsites[bombsite_position]['status'] < 3 and self.target_bombsites[agent.id] == bombsite_position and self._heuristic(agent.position,bombsite_position) <= self.world.constants.police_vision_distance:
                            distance, path = self._a_star(agent.position, bomb.position)
                            time_needed = distance + self.world.constants.bomb_defusion_time
                            if time_needed < bomb.explosion_remaining_time:
                                found = True
                                self.bombsites[bombsite_position]['task'] = 1
                                self.bombsites[bombsite_position]['agent'] = agent.id
                                self.path_move(agent,path)
                                self._agent_print(agent.id, 'Going to defuse.')
                                break
                                # print('time status:',time_needed,bomb.explosion_remaining_time)
                            else:
                                self._agent_print(agent.id, 'Ignoring bomb due to lack of time :(')
                                self.bombsites[bombsite_position]['task'] = 1
                                self.bombsites[bombsite_position]['agent'] = -1
                                self.bombsites[bombsite_position]['status'] = 3

                    if found:
                        continue
                # patrol:
                cluster = self.clusters[agent.id]

                if cluster:
                    best_bombsite, (best_distance, best_path) = self.best_bombsite_patrol(agent)
                    if best_path:
                        if len(best_path) > 1:
                            self.path_move(agent, best_path)
                else:
                    # len(bombsites) < len(agents)
                    pass
            else: # terrorist
                if self.last_position[agent.id] == agent.position:
                    self.gir_count[agent.id] += 1
                else:
                    self.gir_count[agent.id] = 0
                self.last_position[agent.id] = agent.position
                bombsite_direction = self._find_bombsite_direction(agent)
                doing_bomb_operation = agent.planting_remaining_time != -1
                threatened = False
                if ESoundIntensity.Strong in agent.footstep_sounds:
                    self.heard_sound_count[agent.id] += 1
                else:
                    self.heard_sound_count[agent.id] = 0
                if doing_bomb_operation:
                    try:
                        multiple = len(bombsite_direction)
                    except:
                        multiple = False
                    if multiple:
                        bombsite_direction = bombsite_direction[0]
                    bombsite_position = self.target_bombsites[agent.id]
                    if self.heard_sound_count[agent.id] >= (self.world.constants.terrorist_vision_distance - self.world.constants.police_vision_distance):
                        threatened = True
                        # self.heard_sound_count[agent.id] = 0
                    if not threatened:
                        self.gir_count[agent.id] = 0
                        if agent.planting_remaining_time <= 1:
                            self._agent_print(agent.id, 'Finishing bomb operation')
                            self.bombsites[bombsite_position]['task'] = 0
                            self.bombsites[bombsite_position]['status'] = 2
                            self.target_bombsites[agent.id] = None
                        else:
                            self._agent_print(agent.id, 'Continuing bomb operation')
                            self.bombsites[bombsite_position]['task'] = 2
                    else:
                        self._agent_print(agent.id, 'I swear I heard police footsteps, time to look around or maybe scape')
                        if len(self._empty_directions(agent.position)) == 1:
                            self.scape_bombsite(agent)
                        else:
                            self.move(agent.id, agent.position.direction_to(Position(bombsite_position[1],bombsite_position[0])))
                        self.bombsites[bombsite_position]['task'] = 1
                    continue
                if bombsite_direction:
                    bombsite_position = self._sum_pos_tuples(self.DIR_TO_POS[bombsite_direction],(agent.position.y,agent.position.x))
                    threatening_polices = []
                    for police in self.world.polices:
                        distance = self._heuristic(police.position,agent.position)
                        if distance <= self.world.constants.terrorist_vision_distance and police.status == EAgentStatus.Alive:
                            threatening_polices.append(police)
                    if threatening_polices:
                        self.scape_polices(agent,threatening_polices)
                        threatened = True
                        self.bombsites[bombsite_position]['failed'] += 1
                        self._agent_print(agent.id, "I see police(s). I can always plant, Now I must Scape.")
                        continue
                    if not threatened and self.bombsites[bombsite]['task'] != 2:
                        if bombsite_position == self.target_bombsites[agent.id] and self.bombsites[bombsite_position]['failed'] <= 5:
                            self._agent_print(agent.id, "Starting bomb operation, I don't see any polices.")
                            self.plant(agent.id, bombsite_direction)
                            self.bombsites[bombsite_position]['task'] = 2
                            continue
                # go to best bombsite
                if self.gir_count[agent.id] >= 4:
                    self._agent_print(agent.id, "Doing random move because GIR KARDAM !!!")
                    self.move(agent.id, random.choice(self._empty_directions(agent.position)))
                    continue
                best_bombsite, best_distance, best_path = self.best_bombsite_plant(agent)
                if len(best_path) > 1: # there IS a possible bombsite
                    self.bombsites[best_bombsite]['task'] = 1
                    self.target_bombsites[agent.id] = best_bombsite
                    self.last_distance[agent.id] = best_distance
                    self._agent_print(agent.id, 'Going to bombsite.')
                    self.path_move(agent,best_path)
                else: # no bombsites left to plant
                    threatening_polices = []
                    for police in self.world.polices:
                        if self._heuristic(agent.position,police.position) <= self.world.constants.police_vision_distance + 1:
                            threatening_polices.append(police)
                    if threatening_polices:
                        self.scape_polices(agent,threatening_polices)
                        self._agent_print(agent.id, 'Nothing to do, Scaping police(s).')
                        continue
                    else:
                        bombsite_position = self._find_bombsite_direction(agent,possible_only=False)
                        if bombsite_position:
                            self.scape_bombsite(agent)
                            self._agent_print(agent.id, 'Nothing to do, Getting a safe distance with the last planted bomb.')
                            continue
                        else:
                            self._agent_print(agent.id, 'Nothing to do, waiting ZzZzZ...')



        # if self.my_side == 'Police':
        #     for agent_1 in patrol_moves:
        #         for agent_2 in patrol_moves:
        #             if agent_1 != agent_2 and self._position_to_tuple(agent_1.position) == patrol_moves[agent_2] and self._position_to_tuple(agent_2.position) == patrol_moves[agent_1]:
        #                 self.cluster_index[agent_1.id],self.cluster_index[agent_2.id] = self.cluster_index[agent_2.id],self.cluster_index[agent_1.id]
        #                 bombsite_position_1 = self.bombsite_positions[self.cluster_index[agent_1.id]]
        #                 bombsite_position_2 = self.bombsite_positions[self.cluster_index[agent_2.id]]
        #                 _, path_1 = self._a_star(agent_1.position, bombsite_position_1)
        #                 _, path_2 = self._a_star(agent_2.position, bombsite_position_1)
        #                 patrol_moves[agent_1] = path_1[1]
        #                 patrol_moves[agent_2] = path_2[1]
        #     for agent in patrol_moves:
        #         if patrol_moves[agent]:
        #             self.position_move(agent,patrol_moves[agent])
        # else:
        #     print(self.heard_sound_count)
        if self.my_side == 'Police':
            print(self.target_bombsites)
            # print(self.bombsites)
            # for bombsite in self.bombsites:
            #     print(bombsite,'status:',self.bombsites[bombsite]['status'],'task:',self.bombsites[bombsite]['task'],end=',')
            # print()
        end = time.time()
        print('time:',end - start)
コード例 #16
0
def kmeansTest():
    oriplist = processtool.read_points_fromfile("./Aggregation.txt")

    kmeansfactory = kmeans(7, processtool.generate_plist(oriplist))
    kmeansfactory.kmeansCluster()
    kmeansfactory.printResult()
コード例 #17
0
def trial(filepath, mingene, mincell, cnum=8):
    # make dir
    data_dir = "./data/" + "mincell=" + str(mincell) + "_mingene=" + str(
        mingene) + "/"
    graph_dir = "./process_images/" + "mincell=" + str(
        mincell) + "_mingene=" + str(mingene) + "/"
    model_dir = './model/' + "mincell=" + str(mincell) + "_mingene=" + str(
        mingene) + "/"
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)
    if not os.path.exists(graph_dir):
        os.makedirs(graph_dir)
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)

    # filter data
    filtered = data_dir + "filtered.txt"
    sc = scanpy(filepath, mingenes=mingene, mincells=mincell)
    sc.getScanpy(filtered)

    # train autoencoder
    # autoencoder.train(filtered, model_dir, learning_rate=0.1, batch_size=100, epoch=800)

    # generate tsne
    plot.getTsne(filtered, graph_dir + "tsne.png")

    # generate kmeans
    k, _ = cluster.kmeans(filtered, graph_dir + "tsne with kmeans.png", cnum)

    # generate centroid values
    c = cluster.getCentroids(
        k,
        cluster.tsne(filtered),
    )
    cluster.getGeneofCentroids(filtered,
                               graph_dir + "centroid gene matrix.txt", c)

    # get cell index of centroids
    cindex = [sc.getFilteredCellList()[i] for i in c]
    with open(graph_dir + "centroidCellIndex.txt", "w") as out:
        out.write(str(cindex))

    # get latent space
    autoencoder.getLatentSpace(filtered, data_dir, model_dir)
    latent = data_dir + "latentSpace.txt"

    # generate graphs for latent space
    # generate tsne
    plot.getTsne(latent, graph_dir + "tsne_latent.png")

    # generate kmeans
    cluster.kmeans(latent, graph_dir + "tsne with kmeans_latent.png", cnum)

    # generate centroid values
    c = cluster.getCentroids(
        k,
        cluster.tsne(latent),
    )
    cluster.getGeneofCentroids(latent,
                               graph_dir + "centroid gene matrix_latent.txt",
                               c)

    # get cell index of centroids
    cindex = [sc.getFilteredCellList()[i] for i in c]
    with open(graph_dir + "centroidCellIndex_latent.txt", "w") as out:
        out.write(str(cindex))
コード例 #18
0
                           (labels_names, super_class_labels_names),
                           seed=4,
                           perplexity=30,
                           alpha=0.3,
                           fpath=(fname, fname_super))
        # anlz.tsne_plot(emails_representation, labels, args.seed)

        print('clustering...')
        ##clustering
        if (param.clustering == clust_kneams):
            if (param.affine == 'cosine'):
                # k-means cosine dist
                clusters, kmns_class = clst.kmeans_cosine_dist(
                    emails_representation, param.k)
            elif (param.affine == 'euclidean'):  # k-means euclidean
                clusters, kmns_class = clst.kmeans(emails_representation,
                                                   param.k)
                print('kmeans sum of distanses = %f' % kmns_class.inertia_)
            else:
                raise ValueError('kmenas is not supported with affinity: ' +
                                 param.affine)
        elif (param.clustering == clust_hirarchical):
            # hirarchical
            (clusters, _) = clst.hirarchical(emails_representation,
                                             param.k,
                                             aff=param.affine,
                                             link=param.linkage)
        else:
            raise ValueError('clustering is not supported with: ' +
                             param.clustering)
        # analyze
        print('analyzing...')
コード例 #19
0
from cluster import kmeans

dataset = "datasets/iris.csv"
xs = "SepalLengthCm"
ys = "PetalLengthCm"
k = 2
kmeans(dataset, xs, ys, k)

dataset = "datasets/adult.csv"
xs = "age"
ys = "capitalgain"
k = 2
kmeans(dataset, xs, ys, k)
コード例 #20
0
ファイル: main.py プロジェクト: 72Zn/pyling
def main():
    '''
    prepare input files for automatic text analysis
    '''
    print "PARSING"
    print "extracting contents and titles from file(s)."
    contents,titles = parse.parseDocs(file_documents)
    print ""
    '''
    automatic text analysis
    '''
    print "AUTOMATIC TEXT ANALYSIS"
    documents,terms, docvecs, nulvecs = textanalysis.main(contents,titles,path+file_stopwords, progress)
    print ""
    '''
    clustering
    '''
    print "CLUSTER ANALYSIS"
    print "\nleader clustering"
    print "*: vector forms a new centroid, .: the vector is assigned to existing centroid"
    cleader = cluster.leader(docvecs,leader_threshold,dist,comp,progress)
    print "!"
    print "found " + str(len(cleader)) + " clusters"

    print "\nkmeans clustering"
    print "*: cluster is altered, .: cluster not changed, |: next iteration"
    k = int(round(kmeans_factor*len(cleader),0))
    ckmeans = cluster.kmeans(docvecs, k, dist, comp, True)
    print "!"
    print "found " + str(len(ckmeans)) + " clusters"

#    print "\nagglomorative clustering",
#    t = cluster.threshold(docvecs, agglomorate_factor, dist)
#    clink = cluster.agglomorate(docvecs, linkage, t, dist, comp, True)
#    print "!"
#    print "found " + str(len(clink)) + " clusters"

    print "\naggregate the clustering"
    print "a: consider next A cluster, b: consider next B cluster, !: aggregate"
    t = cluster.threshold(docvecs, aggregate_factor, dist)
    clustering = cluster.aggregate2(cleader,ckmeans,t,dist,comp,True) 
    print "!"
    print "found " + str(len(clustering)) + " clusters"

    print "\nhandle overlap between clusters",
    clustering = cluster.prune(docvecs,clustering,dist,comp)
    print "!"
    print "found " + str(len(clustering)) + " clusters"
    
    print ""
    '''
    topic extraction
    '''
    print "TOPIC EXTRACTION"
    topics = topicextract.FindTopics(terms, clustering, documents, topic_length, progress)
    print ""
    print "The following topics have been found.\n"
    print "One topic per cluster. A Topic is a list of Terms with their cluster weights.\n"
    for t in range(len(topics)): print "Topic["+str(t)+"]:", topics[t], "\n"
    print "\nDONE!"

    for i in range(len(clustering)):
        clustering[i].settopic(topics[i])
    
    # output to files
    out_file = open(outpath+'documents.txt','w')
    for i in documents:
        print>>out_file, i
    out_file.close()
    
    out_file = open(outpath+'terms.txt','w')
    for i in terms:
        print>>out_file, i
    out_file.close()
    
    out_file = open(outpath+'clusters.txt','w')
    for i in range(len(clustering)):
        c = clustering[i]
        for j in c:
            print>>out_file, str(i) + ":" + str(j.ref)
    out_file.close()
    
    out_file = open(outpath+'topics.txt','w')
    for i in topics:
        print>>out_file, i
    out_file.close()
    
    return documents, terms, docvecs, nulvecs, clustering, topics
コード例 #21
0
ファイル: web.py プロジェクト: LesMartins/Flickr-Data-Mining
def othermap():
    the_map = gmaps.genMap(*cluster.kmeans())
    return render_template("map.html", the_map=the_map)
コード例 #22
0
import sys
import numpy as np
from cluster import kmeans

if len(sys.argv) != 5:
    print()
    print(
        "Usage: %s [Seed Value] [Number of Centroids] [training Data] [Test Data] "
        % (sys.argv[0]))
    print()
    sys.exit(1)

train, test = np.loadtxt(sys.argv[3]), np.loadtxt(sys.argv[4])
model = kmeans(train,
               int(sys.argv[2]),
               label=True,
               seed_value=int(sys.argv[1]))
model.createCluster()
correct = 0
if test.ndim < 2:
    classification = model.makeDecision(test, label=True)
    print(classification, test[-1])
    if test[-1] == classification:
        correct += 1
else:
    for data in test:
        classification = model.makeDecision(data, label=True)
        print(classification, data[-1])
        if data[-1] == classification:
            correct += 1
コード例 #23
0
                    type=str,
                    help="log directory for TensorBoard")

params = parser.parse_args()

if __name__ == "__main__":

    if params.mode == "embed":
        embed(params)

    if params.mode == "sif":
        sif(params)

    if params.mode == "cluster":
        if params.kmeans:
            kmeans(params)
        if params.opt_k:
            opt_k(params)
        if params.hierarch_k:
            hierarch_k(params)

    if params.mode == "project":
        if params.pca:
            pca(params)
        if params.tsne:
            tsne(params)

    if params.mode == "metadata":
        # optional write cluster labels to metadata
        if not params.meta_labels:
            params.meta_labels_file = None
コード例 #24
0
    if len(sys.argv) != 5:
        print "Usage: compressImage.py <numero_de_clusters> <threshold> <num_iteraciones> <nombre_imagen_con_extension> "
        exit(1)

    k = int(sys.argv[1])
    threshold = float(sys.argv[2])
    max_iter = int(sys.argv[3])
    file_name = sys.argv[4]
    number_attributes = 3

    # Load image
    image = misc.imread(file_name)

    # Transform pixel matrix into a one dimensional vector
    x = image.reshape((image.shape[0] * image.shape[1], 3))

    clusters = createClusters(k, number_attributes, 0, 255)
    kmeans(clusters, x, threshold, max_iter)

    # Change each pixel color by that of its centroid
    compress(clusters, x)

    # Restore pixel matrix
    x = x.reshape(image.shape)

    im = Image.fromarray(x)
    out_file = os.path.basename(file_name)[:-4]
    out_file += "_" + str(k) + ".png"
    im.show()
    im.save(out_file)
コード例 #25
0
ファイル: web.py プロジェクト: LesMartins/Flickr-Data-Mining
def othermap():
    the_map = gmaps.genMap(*cluster.kmeans())
    return render_template("map.html", the_map=the_map)
コード例 #26
0
	for x in k.getOutput():
		wrtr.writerow(x)
	eWrtr = csv.writer(open("./output/"+fn+"_error.csv","wb"))
	for x in k.getErrors():
		eWrtr.writerow(x)

# make sure data is in random order
random.shuffle(res)
# 20 percent set aside for cross-validation
xval = int(0.20*len(res))

for c in range(1,maxClusters+1):
	minError = sys.maxint
	with timer.Timer():
		for i in range(0,iters):
			k1 = cluster.kmeans(res, c, xval)
			err, xerr = k1.run()
			if xerr < minError:
				minError = xerr
				writeFile("k-%d-%f1.4"%(c,err), k1)
			print 'k-means,',i,',',c,',',minError,',Inter'
			sys.stderr.write("kmeans clusters: %d iter: %d \n"%(c,i))
		print 'k-means,',c,',',minError,',',

minl = math.log(0.9)
maxl = math.log(math.sqrt(2.0) * dataSpread)
dl = (maxl-minl)/float(maxClusters)
for l in [math.exp(minl + i*dl) for i in range(0, 2*maxClusters)]:
	minError = sys.maxint
	with timer.Timer():
		for i in range(0,iters):
コード例 #27
0
ファイル: ex.py プロジェクト: woodinsouth/data_mining-ex
['systems','junior','26...30','46k...50k',3],
['systems','senior','41...45','66k...70k',3],
['marketing','senior','36...40','46k...50k',10],
['marketing','junior','31...35','41k...45k',4],
['secretary','senior','46...50','36k...40k',4],
['secretary','junior','26...30','26k...30k',6]
]

test_data = ['systems','26...30','46k...50k']

header = ['department', 'status', 'age','salary','count']

if __name__ == "__main__":

	optparser = OptionParser()
	optparser.add_option('--mins', dest='min_support', help='minium support value', default=0.6, type='float')
	optparser.add_option('--k', dest='k', help='kmeans k center point', default=3, type=int)

	(options, args) = optparser.parse_args()
	# find frequent itemset in the dataset in textbook
	min_support = options.min_support
	apriori("apriori_d.txt", min_support)

	# classify the test data by the classfier trained by the train data in the textbook, which is write in the code module
	DesisionTree(train_data)
	NaiveBayes(train_data, test_data)

	# cluster the dataset in textbook
	k = options.k
	textbook = kmeans("kmeans_t.txt", k)
コード例 #28
0
ファイル: ai.py プロジェクト: lostact/SearchAndDefuse
    def initialize(self):
        start = time.time()
        os.system("printf '\033c'")
        print('initialize')

        # AI constants:
        self.DIRECTIONS = [
            ECommandDirection.Up,
            ECommandDirection.Right,
            ECommandDirection.Down,
            ECommandDirection.Left,
        ]

        self.DIR_TO_POS = {
            ECommandDirection.Up:    (-1, +0),
            ECommandDirection.Right: (+0, +1),
            ECommandDirection.Down:  (+1, +0),
            ECommandDirection.Left:  (+0, -1),
        }

        self.BOMBSITES_ECELL = [
            ECell.SmallBombSite,
            ECell.MediumBombSite,
            ECell.LargeBombSite,
            ECell.VastBombSite,
        ]

        self.BOMBSITE_COEFFICIENT = {
            ECell.SmallBombSite: self.world.constants.score_coefficient_small_bomb_site,
            ECell.MediumBombSite: self.world.constants.score_coefficient_medium_bomb_site,
            ECell.LargeBombSite: self.world.constants.score_coefficient_large_bomb_site,
            ECell.VastBombSite: self.world.constants.score_coefficient_vast_bomb_site,
        }

        self.sound_kinds = [ESoundIntensity.Strong,ESoundIntensity.Normal,ESoundIntensity.Weak]

        # agent constants:
        self.unreachable_bscore_coefficient = 2
        self.bombsite_size_coefficient = -1
        # self.unreachable_distance_power = 0.125
        self.failed_bombsite_coefficient = -10
        if self.my_side == 'Police':
            my_agents = self.world.polices
        else:
            my_agents = self.world.terrorists

        # bombsite dictionaries:
        self.bombsites = {}
        self.unreachable_bombsites = []
        self.start_pos = my_agents[0].position
        is_police = (self.my_side == 'Police')
        for row,line in enumerate(self.world.board):
            for col,cell in enumerate(line):
                if cell not in [ECell.Wall,ECell.Empty]:
                    distance, path = self._a_star(self.start_pos,(row,col),agent_block=False)
                    if path:
                        # KEYS:
                        # status key: 0<: clear for sure, 0: unkown, 1: bomb is possible, 2: bomb for sure 3: no time to defuse
                        # task key: 0 = nothing, 1 = going to act, 2 = acting 
                        # act means defuse or plant
                        self.bombsites[(row,col)] = {'size':cell,'initial_distance':distance, 'status':0,'task': 0,'bscore': 0, 'failed':0,'agent':None,'ert':-1}
                    else:
                        self.unreachable_bombsites.append((row,col))
        # ai variables:
        if self.my_side == 'Police':
            bombsite_positions = sorted(self.bombsites, key = lambda x: self.bombsites[x]['initial_distance'])
            self.clusters, self.cluster_centers = kmeans(bombsite_positions,len(my_agents))
            print(self.clusters, self.cluster_centers)
            self.cluster_index = [0] * len(my_agents)
            self.checked_bombsites = [[] for i in range(len(my_agents))]
            self.target_bombsites = [None] * len(my_agents)
            for i,cluster in enumerate(self.clusters):
                self.target_bombsites[i] = cluster[0]
        else:
            self.last_bombs = set([])
            self.heard_sound_count = [0] * len(my_agents)
            self.target_bombsites = [None] * len(my_agents)
            self.last_position = [[]] * len(my_agents)
            self.last_distance = [0] * len(my_agents)
            self.gir_count = [0] * len(my_agents)
            for agent in my_agents:
                self.last_position[agent.id] = agent.position

        for unreachable_bombsite in self.unreachable_bombsites:
            _, path, bypassed_bombsites = self._a_star(self.start_pos,unreachable_bombsite,agent_block=False,bombsite_block=False)
            bypassed_bombsite = bypassed_bombsites[-1]
            unreachable_size = self.world.board[unreachable_bombsite[0]][unreachable_bombsite[1]]
            self.bombsites[bypassed_bombsite]['bscore'] += (self.BOMBSITE_COEFFICIENT[unreachable_size] * self.unreachable_bscore_coefficient)
        self.last_scores = self.world.scores
        

        # timing:
        end = time.time()
        print('initialize time',end - start)

        #debug:
        print(self.unreachable_bombsites)
コード例 #29
0
X_pca = pca.transform(X_scaled)
print("Cumulative Explained Variance:", pca.explained_variance_ratio_.sum())

#perform LDA for 2 axes, measure performance
LDA = LinearDiscriminantAnalysis(n_components=2,
                                 shrinkage='auto',
                                 solver='eigen')
LDA_reduced_df = LDA.fit(X_scaled, Y).transform(X_scaled)

print(LDA.score(X_scaled, Y))

#find all silhouette scores from 5-20
find_best_cluster("kmeans", LDA_reduced_df, 5, 20)

#perform kmeans on chosen K
kmeans = kmeans(LDA_reduced_df, 5)

# Assign labels
data['Cluster'] = kmeans['labels']

# Print silhouette score
print("silhouette score:", kmeans['silhouette_score'])

# Target labels
y = kmeans['labels']
df = pd.DataFrame({
    'X1': LDA_reduced_df[:, 0],
    'X2': LDA_reduced_df[:, 1],
    'labels': y
})
コード例 #30
0
        wrtr.writerow(x)
    eWrtr = csv.writer(open("./output/" + fn + "_error.csv", "wb"))
    for x in k.getErrors():
        eWrtr.writerow(x)


# make sure data is in random order
random.shuffle(res)
# 20 percent set aside for cross-validation
xval = int(0.20 * len(res))

for c in range(1, maxClusters + 1):
    minError = sys.maxint
    with timer.Timer():
        for i in range(0, iters):
            k1 = cluster.kmeans(res, c, xval)
            err, xerr = k1.run()
            if xerr < minError:
                minError = xerr
                writeFile("k-%d-%f1.4" % (c, err), k1)
            print 'k-means,', i, ',', c, ',', minError, ',Inter'
            sys.stderr.write("kmeans clusters: %d iter: %d \n" % (c, i))
        print 'k-means,', c, ',', minError, ',',

minl = math.log(0.9)
maxl = math.log(math.sqrt(2.0) * dataSpread)
dl = (maxl - minl) / float(maxClusters)
for l in [math.exp(minl + i * dl) for i in range(0, 2 * maxClusters)]:
    minError = sys.maxint
    with timer.Timer():
        for i in range(0, iters):