def create_cluster_city(city):

    filename = city + "_cluster.json"
    f = open(filename, "w")
    r = get_cluster(city)
    f.write(json.dumps(r))
    f.close()
Exemple #2
0
def attach(args):
    try:
        for line in get_cluster(args.name).node(args.node).attach():
            sys.stdout.write(line)
            sys.stdout.flush()
    except KeyboardInterrupt:
        pass
Exemple #3
0
def get_clusters(city):
    all_clusters = get_cluster(city, cluster_size=8)
    clusters = []
    for index in range(0, len(all_clusters)):

        cluster = all_clusters[index]["data"]
        geolocation = ""
        poi_index = 1
        points = []
        for s in cluster:
            p = PointOfInterest(s['duration'] * 60, s['score'])
            points.append(p)
            geolocation += str(s["geolocation"]) + "%7C"
            poi_index += 1
            if poi_index > 8:
                break

        geolocation = geolocation[:-3]

        distancematrix = get_distances(geolocation)

        cluster = Cluster(points, distancematrix)
        clusters.append(cluster)

    return clusters
def create_problem_file(city):

    all_clusters = get_cluster(city, cluster_size=8)
    problem_text_mids = []

    for index in range(0, len(all_clusters)):

        cluster = all_clusters[index]["data"]
        problem_text_mid = ""
        object_text_def = """
		(:objects
        	user1
        	waypoint"""
        geolocation = ""
        poi_index = 1
        for s in cluster:
            object_text_def += " waypoint" + str(poi_index)
            problem_text_mid += "\t\t(waypoint waypoint" + str(
                poi_index) + ")\n"
            problem_text_mid += "\t\t(= (score waypoint" + str(
                poi_index) + ") " + str(s["score"]) + ")\n"
            problem_text_mid += "\t\t(= (duration waypoint" + str(
                poi_index) + ") " + str(s["duration"] * 60) + ")\n"
            geolocation += str(s["geolocation"]) + "%7C"
            poi_index += 1
            if poi_index > 8:
                break
        object_text_def += "\n)"

        geolocation = geolocation[:-3]

        distancematrix = get_distances(geolocation)

        for i in range(1, poi_index):
            for j in range(i + 1, poi_index):
                problem_text_mid += "\t\t(= (drive-time waypoint" + str(
                    i) + " waypoint" + str(j) + ") " + str(
                        distancematrix[i - 1][j - 1]) + " )\n"
                problem_text_mid += "\t\t(= (drive-time waypoint" + str(
                    j) + " waypoint" + str(i) + ") " + str(
                        distancematrix[i - 1][j - 1]) + " )\n"

        for i in range(1, poi_index):
            problem_text_mid += "\t\t(not ( visited user1 waypoint" + str(
                i) + ") )\n"

        problem_text_mid += "\t\t(user-at user1 waypoint1) ) \n\n\t(:goal\n\t\t(and\n"

        for i in range(1, poi_index):
            problem_text_mid += "\t\t\t(visited user1 waypoint" + str(
                i) + ")\n"

        problem_text_mids.append(object_text_def + problem_text_mid1 +
                                 problem_text_mid)

    all_file_data = []
    for i in problem_text_mids:
        all_file_data.append(problem_text_start + i + problem_text_end)

    return all_file_data
Exemple #5
0
 def get_cluster(user_id):
   nearby = json.loads(Event.get_group(user_id))
   events = [Event.objects(user_id = user_id).first() for user_id in nearby]
   locs = [e.loc for e in events]
   group = zip(nearby, locs)
   cl = cluster.get_cluster(group)
   return json.dumps([uid for (uid, loc) in cl])
Exemple #6
0
 def update_ref_cluster_dict(self):
     self.ref_cluster_dict = get_cluster(self.ref_file_name,
                                         self.maf_upper_bound,
                                         self.maf_lower_bound)
     for maf_num, cluster_list in self.ref_cluster_dict.iteritems():
         for cluster_dict in cluster_list:
             for pos, ref in cluster_dict.iteritems():
                 self.cluster_pos_dict[pos] = ref
     print "cluster_pos_dict: ", len(self.cluster_pos_dict)
Exemple #7
0
def logs(args):
    output = get_cluster(args.name).node(args.node).logs(stream=args.stream)
    if isinstance(output, basestring):
        print output
    else:
        try:
            for line in output:
                sys.stdout.write(line)
                sys.stdout.flush()
        except KeyboardInterrupt:
            pass
Exemple #8
0
def main():


    parser = argparse.ArgumentParser(description='Process some integers.')
    parser.add_argument('num_ps', type=int, help='number of parameter servers')
    parser.add_argument('num_workers', type=int, help='number of workers')
    parser.add_argument('ps_index', type=int, help='ps index')

    args = parser.parse_args()
    print(args)

    print(f"{args.num_ps} ps and {args.num_workers} workers.")

    cluster = get_cluster(args.num_ps, args.num_workers)
    print(cluster)

    server = tf.train.Server(cluster, job_name="ps", task_index=args.ps_index)

    server.join()
Exemple #9
0
def test(queries=list(), location='./test'):
    """
    Test your system with the input. For each input, generate a list of IDs that is returned
    :param queries: list of image-IDs. Each element is assumed to be an entry in the test set. Hence, the image
    with id <id> is located on my computer at './test/pics/<id>.jpg'. Make sure this is the file you work with...
    :param location: The location of the test data folder hierarchy
    :return: a dictionary with keys equal to the images in the queries - list, and values a list of image-IDs
    retrieved for that input
    """

    # ##### The following is an example implementation -- that would lead to 0 points  in the evaluation :-)
    my_return_dict = {}

    # Load the dictionary with all training files. This is just to get a hold of which
    # IDs are there; will choose randomly among them
    # training_labels = pickle.load(open('./train/pickle/combined.pickle', 'rb'))
    # training_labels = list(training_labels.keys())
    count = 0
    tot = len(queries)
    cluster = get_cluster()
    model = get_prediction_model()
    for query in queries:

        # This is the image. Just opening if here for the fun of it; not used later
        # query_image = Image.open(location + '/pics/' + query + '.jpg')
        # query_image.show()

        # Generate a random list of 50 entries
        # cluster = [training_labels[random.randint(0, len(training_labels) - 1)] for idx in range(50)]
        image_embedding = embed_image(location + '/pics/' + query + '.jpg')
        trained_image_embedding = predict_vector_on_model(
            image_embedding, model)
        cluster_filenames = compare_to_cluster(trained_image_embedding,
                                               cluster, 50)
        my_return_dict[query] = cluster_filenames
        print_progress(count, tot, prefix="Predicting images")
        count += 1
    return my_return_dict
Exemple #10
0
def stop(args):
    get_cluster(args.name).node(args.node).stop()
Exemple #11
0
def cluster_nodes(args):
    print cluster_to_str(get_cluster(args.name))
Exemple #12
0
def cluster_info(args):
    if args.name is not None:
        print get_cluster(args.name)
    else:
        print '\n'.join([str(cluster) for cluster in get_clusters()])
Exemple #13
0
def upgrade_node(args):
    get_cluster(args.name).upgrade_node(args.node, version=args.version)
Exemple #14
0
def remove_node(args):
    get_cluster(args.name).remove_node(args.node)
Exemple #15
0
def heal(args):
    get_cluster(args.name).network.heal(args.local, args.remote)
Exemple #16
0
def drop(args):
    get_cluster(args.name).network.drop(args.node, args.probability,
                                        args.correlation)
Exemple #17
0
def recover(args):
    get_cluster(args.name).node(args.node).recover()
Exemple #18
0
def app():
    # basic setting
    basic_info_cols = ["Status", "Phases", "Study Type", "Study Results",          "Trial_Duration_Category",
                       "INDUSTRY", "NIH", "OTHER FUND SOURCE", "U.S. FED"]
    participants_info_cols = ["Age", "Gender","Enrollment_Category"]
    study_design_cols = ["ALLOCATION", "INTERVENTION MODEL", "PRIMARY PURPOSE",
                        "OBSERVATIONAL MODEL", "TIME PERSPECTIVE",
                        "PARTICIPANT", "CARE PROVIDER","INVESTIGATOR", "OUTCOMES ASSESSOR"]
    intervention_cols = ["DRUG", "PROCEDURE", "OTHER INTERVENTIONS TYPE", "DEVICE", "BIOLOGICAL", "DIAGNOSTIC TEST",
                        "DIETARY SUPPLEMENT", "GENETIC", "COMBINATION PRODUCT", "BEHAVIORAL", "RADIATION"]

    feature_set = {
        "basic info" : basic_info_cols,
        "pariticpants" : participants_info_cols,
        "study design" : study_design_cols,
        "intervention" : intervention_cols
    }
    
    
    # load data
    df = cluster.get_data_for_cluster()
    
    st.sidebar.subheader("Cluster options:")
    scope = st.sidebar.selectbox("Choose scope:", options = ["Worldwide", "US"])
    def attr_show(x):
        return x.title()
    attr = st.sidebar.selectbox("Choose by which set of attributions to cluster:",
                         options = [
                            "basic info",
                            "pariticpants",
                            "study design",
                            "intervention",
                         ],
                         format_func=attr_show)
    n_clusters = st.sidebar.selectbox("Choose the number of clusters:",
                         options = [
                            "Auto",
                            3,
                            4,
                            5,
                            6,
                            7
                         ])
    st.sidebar.subheader("Display options:")
    display = st.sidebar.selectbox("Choose by which set of attributions to cluster:",
                         options = feature_set[attr])
    show_centroid = st.sidebar.checkbox("Show centroid of each cluster")
    show_cluster_table = st.sidebar.checkbox("Show trials with preidcted cluster")
    
    # filter df
    if scope == "US":
        df = df[df["Location_Country"] == "UNITED STATES OF AMERICA"]
    df_feature = cluster.choose_feature(df=df, feature_type=attr)
    
    # implement cluster
    if n_clusters == "Auto":
        km = cluster.get_cluster(df=df_feature)
    else:
        km = KModes(n_clusters=n_clusters, init = "Huang", n_init = 1, verbose=0, random_state=1)
    df_with_cluster, cluster_centroids, cluster_labels = cluster.get_clustered_data(km=km, df=df_feature)
    
    # page layout
    st.title('Can we divide trials into several groups?')
    
    st.header(
        """
        We use K-Mode algorithm to classify COVID-19 clinical trials into different groups.
        """
    )
    
    with st.beta_expander("Click here to expand more details about our cluster model"):
        st.subheader("K-Mode cluster introduction:")
        st.markdown(
            """
            We use K-Mode algorithm for our cluster model beacuse our dataset is fully composed of catgorical data, and we cannot apply K-Mean since "distance" is meaningless for categorical dataset. \n
            What K-Mode do is basically the same as K-Mean, except it choose the mode in each feature as the center instead of mean. For example, if our dataset is students from different contries, for the initial cluster, in cluster 1, 5 students come from US, 3 from China, 2 from Japan, so the centroid of this cluster is US. The rest steps of K-Mode are exactly the same as K-mean. \n
            To implement this algorithm, we use the package `kmodes`, and you can refer to this [site](https://pypi.org/project/kmodes/) for more information.
            """
        )
    # plot
    st.subheader(f'Count of trial for each catergory of {display} attribution in each cluster')
    plot = cluster.plot_cluster(df_with_cluster=df_with_cluster, feature=display)
    plot.update_layout(margin={"r": 0, "t": 10, "l": 0, "b": 0},
                               height=400,
                               plot_bgcolor='rgba(0,0,0,0)')
    st.plotly_chart(plot, use_container_width=True)
    
    # centroids
    if show_centroid:
        st.subheader("Centroid of each cluster")
        st.write(cluster_centroids)
    
    # cluster table
    if show_cluster_table:
        st.subheader("Table of trials with preidcted cluster")
        st.write(df_with_cluster)
Exemple #19
0
def restore(args):
    get_cluster(args.name).network.restore(args.node)
Exemple #20
0
def corrupt(args):
    get_cluster(args.name).network.corrupt(args.node, args.probability)
Exemple #21
0
def duplicate(args):
    get_cluster(args.name).network.duplicate(args.node, args.probability,
                                             args.correlation)
Exemple #22
0
def reorder(args):
    get_cluster(args.name).network.reorder(args.node, args.probability,
                                           args.correlation)
Exemple #23
0
def start(args):
    get_cluster(args.name).node(args.node).start()
Exemple #24
0
def kill(args):
    get_cluster(args.name).node(args.node).kill()
Exemple #25
0
def delay(args):
    get_cluster(args.name).network.delay(args.node, args.latency, args.jitter,
                                         args.correlation, args.distribution)
Exemple #26
0
def restart(args):
    get_cluster(args.name).node(args.node).restart()
Exemple #27
0
def teardown(args):
    cluster = get_cluster(args.name)
    cluster.teardown()
    if args.delete:
        cluster.cleanup()
Exemple #28
0
def stress(args):
    get_cluster(args.name).stress(args.node, args.timeout, args.cpu, args.io,
                                  args.memory, args.hdd)
Exemple #29
0
def add_node(args):
    get_cluster(args.name).add_node(*args.config,
                                    version=args.version,
                                    debug=args.debug,
                                    trace=args.trace)
def app():
    # basic setting
    basic_info_cols = [
        "Status", "Phases", "Study Type", "Study Results",
        "Trial_Duration_Category", "INDUSTRY", "NIH", "OTHER FUND SOURCE",
        "U.S. FED"
    ]
    participants_info_cols = ["Age", "Gender", "Enrollment_Category"]
    study_design_cols = [
        "ALLOCATION", "INTERVENTION MODEL", "PRIMARY PURPOSE",
        "OBSERVATIONAL MODEL", "TIME PERSPECTIVE", "PARTICIPANT",
        "CARE PROVIDER", "INVESTIGATOR", "OUTCOMES ASSESSOR"
    ]
    intervention_cols = [
        "DRUG", "PROCEDURE", "OTHER INTERVENTIONS TYPE", "DEVICE",
        "BIOLOGICAL", "DIAGNOSTIC TEST", "DIETARY SUPPLEMENT", "GENETIC",
        "COMBINATION PRODUCT", "BEHAVIORAL", "RADIATION"
    ]

    feature_set = {
        "basic info": basic_info_cols,
        "pariticpants": participants_info_cols,
        "study design": study_design_cols,
        "intervention": intervention_cols
    }

    # load data
    df = cluster.get_data_for_cluster()

    st.sidebar.subheader("Cluster options:")
    scope = st.sidebar.selectbox("Choose scope:", options=["Worldwide", "US"])

    def attr_show(x):
        return x.title()

    attr = st.sidebar.selectbox(
        "Choose by which set of attributions to cluster:",
        options=[
            "basic info",
            "pariticpants",
            "study design",
            "intervention",
        ],
        format_func=attr_show)
    n_clusters = st.sidebar.selectbox("Choose the number of clusters:",
                                      options=["Auto", 3, 4, 5, 6, 7])
    st.sidebar.subheader("Display options:")
    display = st.sidebar.selectbox(
        "Choose by which set of attributions to cluster:",
        options=feature_set[attr])
    show_centroid = st.sidebar.checkbox("Show centroid of each cluster")
    show_cluster_table = st.sidebar.checkbox(
        "Show trials with preidcted cluster")

    # filter df
    if scope == "US":
        df = df[df["Location_Country"] == "UNITED STATES OF AMERICA"]
    df_feature = cluster.choose_feature(df=df, feature_type=attr)

    # implement cluster
    if n_clusters == "Auto":
        km = cluster.get_cluster(df=df_feature)
    else:
        km = KModes(n_clusters=n_clusters,
                    init="Huang",
                    n_init=1,
                    verbose=0,
                    random_state=1)
    df_with_cluster, cluster_centroids, cluster_labels = cluster.get_clustered_data(
        km=km, df=df_feature)

    # page layout
    st.title('Can we divide trials into several groups?')

    st.header("""
        We use K-Mode algorithm to classify COVID-19 clinical trials into different groups.
        """)

    with st.beta_expander(
            "Click here to expand more details about our cluster model"):
        st.subheader("Say something about the model:")
        st.markdown("""
            say something here...
            """)
    # plot
    st.subheader(
        f'Count of trial for each catergory of {display} attribution in each cluster'
    )
    plot = cluster.plot_cluster(df_with_cluster=df_with_cluster,
                                feature=display)
    plot.update_layout(margin={
        "r": 0,
        "t": 10,
        "l": 0,
        "b": 0
    },
                       height=400,
                       plot_bgcolor='rgba(0,0,0,0)')
    st.plotly_chart(plot, use_container_width=True)

    st.write("")

    # centroids
    st.subheader("Centroid of each cluster")
    if show_centroid:
        st.write(cluster_centroids)

    st.write("")

    # cluster table
    st.subheader("Table of trials with preidcted cluster")
    if show_cluster_table:
        st.write(df_with_cluster)
Exemple #31
0
def destress(args):
    get_cluster(args.name).destress(args.node)