Esempio n. 1
0
def load_clusters():
    """Load info from topics.txt file into Cluster, TermCluster tables"""

    # Delete whatever's in the db already
    Cluster.query.delete()
    TermCluster.query.delete()

    count_clusters = 0

    for row in open("topics.csv"):

        row = row.rstrip().split(",")

        # Parse the txt into the appropriate data types for seeding
        cluster = int(row[1][-3:])
        word = row[3].strip()

        # Check if word is in our list of key terms. If it is, add to
        # TermCluster table to allow for lookup later (see seed.py for TODO)

        if Term.check_for_term(word) is True:
            term_cluster_to_add = TermCluster(word=word, cluster_id=cluster)
            db.session.add(term_cluster_to_add)
            db.session.commit()

        # Check if a cluster is in our list of clusters. If it's not, add it.
        if Cluster.check_for_cluster(cluster) is False:
            cluster_to_add = Cluster(cluster_id=cluster)
            db.session.add(cluster_to_add)
            db.session.commit()

        # Print where we are and increment counter
        print "Topics.txt seeding row", count_clusters

        count_clusters += 1
Esempio n. 2
0
def computeNeighborhood(p, setPoints, typeDistance, minPoints, eps):
    pointsOfCluster = []
    for q in setPoints:
        if typeDistance == 0:
            if p.is_in_neighborhoodByEUSimple(q, eps):
                pointsOfCluster.append(q)
        elif typeDistance == 1:
            if p.is_in_neighborhoodEURelativeSpeed(q, eps):
                pointsOfCluster.append(q)
    if len(pointsOfCluster) < minPoints:
        return None
    else:
        return Cluster(None, pointsOfCluster)
Esempio n. 3
0
def load_clusters():
    """Load info from topics.txt file into Cluster, TermCluster tables

    File format: R row id,Topic XXX,R column ID,word

        where XXX represents a number between 0-400
        R ids can be discarded during seeding 

    Source: topic clustering data from Neurosynth, converted to long format
    in R prior to seeding. 
    Notes: the words tracked in this clustering are not in perfect
    alignment with those tracked in studies_terms.txt. Approximately 2000 of the 
    terms in studies_terms have a topical cluster, the remaining ~1000 do not.
    This number could be improved by stemming. Many of the words not tracked
    in clusters are multi-word phrases."""

    # Delete whatever's in the db already
    Cluster.query.delete()
    TermCluster.query.delete()

    count_clusters = 0
    topics_fileobj = open('seed_data/topics.csv')

    for row in topics_fileobj:

        row = row.rstrip().split(',')

        # Parse the txt into the appropriate data types for seeding
        cluster = int(row[1][-3:])
        word = row[3].strip()

        # Check if word is in our list of key terms. If it is, add to
        # TermCluster table to allow for lookup later (see model.py for TODO)

        if Term.check_for_term(word) is True:
            term_cluster_to_add = TermCluster(word=word, cluster_id=cluster)
            db.session.add(term_cluster_to_add)
            db.session.commit()

        # Check if a cluster is in our list of clusters. If it's not, add it.
        if Cluster.check_for_cluster(cluster) is False:
            cluster_to_add = Cluster(cluster_id=cluster)
            db.session.add(cluster_to_add)
            db.session.commit()

        # Print where we are and increment counter
        print "Topics.txt seeding row", count_clusters

        count_clusters += 1

    topics_fileobj.close()
Esempio n. 4
0
def dbscan(setPoints, eps, minPoints):
    clusters = []
    noises = []
    IDs = []
    classifications = {}
    for pt in setPoints:
        IDs.append(pt.id)
        classifications[pt.id] = 0

    for pt in setPoints:
        if classifications[pt.id] == 0:
            points = _expand_cluster(setPoints, pt, classifications, noises,
                                     eps, minPoints)
            if points:
                clusters.append(Cluster(None, points))
    # s=""
    # for p in noises:
    #     s+=p.toString()+' '
    # print(s)
    # s=""
    # for c in clusters:
    #     s += c.toString()+'\n'
    # print(s)
    return [clusters, noises, IDs]
Esempio n. 5
0
import sys
sys.path.append("../src")
from simulator import Simulator
from model import Cluster

#import stacktracer
#stacktracer.trace_start("trace.html",interval=5,auto=True) # Set auto flag to always update file!

model = Cluster(2)

sim = Simulator(model)
sim.setVerbose(None)
#sim.setTerminationTime(10.0)
sim.setStateSaving("custom")
sim.simulate()