コード例 #1
0
def process_single_day(day, data_dir):
    day_t = arrow.get(day)
    fpath = os.path.join(data_dir, 'vehicle_positions', day + '.csv')
    stops = get_metadata(day_t, 'stops', data_dir)
    schedule = get_metadata(day_t, 'schedule', data_dir)
    print('Processing file:', fpath)
    return process_day(pd.read_csv(fpath), stops, schedule)
コード例 #2
0
def fetch_image_meta(paths=None):
    if paths:
        meta = get_metadata(f={'path': {'$in': paths}}, master_db=True)
    else:
        meta = get_metadata(master_db=True)

    meta = {m['imageName']: 1 for m in meta}
    return meta
コード例 #3
0
ファイル: api.py プロジェクト: tgoodyear/bedrock-core
    def put(self):
        '''
        Add a new analytic via file upload. This is a security risk.
        '''
        try:
            time = datetime.now()
            # make the id more meaningful
            file = request.files['file']
            filename = secure_filename(file.filename)
            name,  ext = splitext(filename)
            if not ext in ALLOWED_EXTENSIONS:
                return 'This filetype is not supported.', 415

            #save the file
            analytic_id = name + str(time.year) + str(time.month) + str(
                time.day) + str(time.hour) + str(time.minute) + str(
                    time.second)
            filepath = ANALYTICS_OPALS + analytic_id + '.py'
            file.save(filepath)

            #get the metadata from the file
            metadata = utils.get_metadata(analytic_id)
            metadata['analytic_id'] = analytic_id

            _, col = analytics_collection()
            col.insert(metadata)
            meta = drop_id_key(metadata)
        except:
            tb = traceback.format_exc()
            return tb, 406

        return meta, 201
コード例 #4
0
    def track_changed(self):
        if not self.on:
            return
        if not bool(lib.SpPlaybackIsActiveDevice()):
            return
        self.pause()
        # Scrobble last song only if the song has been played more than half
        # of its duration or during more than 4 minutes
        if self.metadata and self.play_cumul > min(
                self.metadata["duration"] / 2000, 240):
            self.lastfm_network.scrobble(
                artist=self.metadata["artist_name"],
                title=self.metadata["track_name"],
                timestamp=int(self.metadata["time_on"]),
                album=self.metadata["album_name"],
                duration=(self.metadata["duration"] / 1000))
            print "LastFM: scrobbled track " + self.metadata[
                "track_name"] + " - " + self.metadata["artist_name"]

        # Update now playing song
        self.play_cumul = 0
        self.play()
        self.metadata = get_metadata()
        self.metadata["time_on"] = time.time()
        self.lastfm_network.update_now_playing(
            artist=self.metadata["artist_name"],
            title=self.metadata["track_name"],
            album=self.metadata["album_name"],
            duration=int(self.metadata["duration"] / 1000))
コード例 #5
0
    def get_data_matrix(cls,
                        feature,
                        label=None,
                        unlabelled=False,
                        ignore_metadata=False):
        min_max_scaler = MinMaxScaler()

        f = {}
        if label:
            label_images = utils.filter_images(label)
            f = {'path': {'$in': label_images}}

        # Build and scale feature matrix
        images, feature_space = utils.get_all_vectors(feature,
                                                      f=f,
                                                      unlabelled_db=unlabelled)
        feature_space = min_max_scaler.fit_transform(feature_space)
        # Not including metadata boosts accuracy of Set 2
        # Including metadata boosts accuracy of Set 1
        if ignore_metadata:
            meta = utils.get_metadata(unlabelled_db=unlabelled)
            # Mapping between image file path name and the metadata
            meta = {m['path']: m for m in meta}
            return images, meta, feature_space

        # Build and scale metadata matrix
        meta, metadata_space = cls.get_metadata_space(images,
                                                      unlabelled_db=unlabelled)
        metadata_space = min_max_scaler.fit_transform(metadata_space)

        # Column stack them
        data_matrix = np.c_[feature_space, metadata_space]

        return images, meta, data_matrix
コード例 #6
0
def metadata():
    ip = args.elasticIp
    print(
        f'The elastic server ip address is {args.elasticIp} elastic server port is {args.elasticPort}'
    )
    metadata = get_metadata(args.elasticIp, args.elasticPort)
    # print(metadata)
    return jsonify(metadata)
コード例 #7
0
def get_unlabelled_data(feature):
    u_images, u_vectors = utils.get_all_vectors(feature, unlabelled_db=True)

    # Get metadata
    meta = utils.get_metadata(unlabelled_db=True)
    meta = {m['path']: m for m in meta}

    return u_images, meta, u_vectors
コード例 #8
0
ファイル: samsung.py プロジェクト: tatterdemalion/rawganize
def get_filename(path, ext):
    with open(path, 'r') as f:
        line = f.readlines()[100]
        md5 = hashlib.md5(line).hexdigest()
        f.seek(0)
        created = get_metadata(f)['created']
        timestamp = int(time.mktime(created.timetuple()))
        filename = '%s-%s' % (timestamp, md5) + '.' + ext
    return filename, created
コード例 #9
0
def get_labelled_data(feature):
    # Get labelled images
    l_images, feature_space = utils.get_all_vectors(feature)

    # Get metadata
    meta = utils.get_metadata()
    meta = {m['path']: m for m in meta}

    return l_images, meta, feature_space
コード例 #10
0
def process_range(start, end, data_dir):
    dates = date_range(arrow.get(start), arrow.get(end))
    print('Processing dates from {} to {}'.format(start, end))

    path = os.path.join(data_dir, 'vehicle_positions') + '/{}.csv'
    paths = map(lambda day: (path.format(day), arrow.get(day)), dates)

    results = []
    for fpath, day in paths:
        stops = get_metadata(day, 'stops', data_dir)
        schedule = get_metadata(day, 'schedule', data_dir)
        now = arrow.now()
        print('Processing file:', fpath)
        df = process_day(pd.read_csv(fpath), stops, schedule)
        results.append(df)
        print('Process {} in {}s'.format(day, (arrow.now() - now).seconds))

    combined = pd.concat(results)
    combined.to_csv('{}_{}.csv'.format(start, end), index=False)
コード例 #11
0
ファイル: narps.py プロジェクト: rotemb9/narps-2
    def __init__(self,
                 basedir,
                 metadata_file=None,
                 verbose=False,
                 overwrite=False,
                 dataurl=None,
                 testing=False):
        self.basedir = basedir
        self.dirs = NarpsDirs(basedir, dataurl=dataurl, testing=testing)
        self.verbose = verbose
        self.teams = {}
        self.overwrite = overwrite
        self.started_at = datetime.datetime.now()
        self.testing = testing

        # create the full mask image if it doesn't already exist
        if not os.path.exists(self.dirs.full_mask_img):
            print('making full image mask')
            self.mk_full_mask_img(self.dirs)
        assert os.path.exists(self.dirs.full_mask_img)

        # get input dirs for orig data
        self.image_jsons = None
        self.input_dirs = self.get_input_dirs(self.dirs)

        # check images for each team
        self.complete_image_sets = {}
        self.get_orig_images(self.dirs)
        for imgtype in ['thresh', 'unthresh']:
            log_to_file(
                self.dirs.logfile,
                'found %d teams with complete original %s datasets' %
                (len(self.complete_image_sets[imgtype]), imgtype))

        # set up metadata
        if metadata_file is None:
            self.metadata_file = os.path.join(
                self.dirs.dirs['orig'], 'analysis_pipelines_for_analysis.xlsx')
        else:
            self.metadata_file = metadata_file

        self.metadata = get_metadata(self.metadata_file)

        self.hypothesis_metadata = pandas.DataFrame(
            columns=['teamID', 'hyp', 'n_na', 'n_zero'])

        self.all_maps = {
            'thresh': {
                'resampled': None
            },
            'unthresh': {
                'resampled': None
            }
        }
        self.rectified_list = []
コード例 #12
0
ファイル: climgur.py プロジェクト: csettles/climgur
    def upload_album(self):
        album_data = get_metadata(
            True) if self.args.metadata else self.metadata
        album = self.client.create_album(album_data)
        print('Created album named "{}"'.format(album_data.get('title')))
        self.log_upload(album)

        album_id = album['id'] if self.client.auth else album['deletehash']

        # get all images in the folder with approved file extensions
        files = [glob(os.path.join(self.args.path, '*' + ext))
                 for ext in file_extensions]
        files = sum(files, [])  # ugly way to flatten list

        for f in files:
            print('Uploading {}'.format(os.path.basename(f)))
            img_data = get_metadata() if self.args.metadata else dict()
            self.upload_pic(f, img_data, album_id)

        return album['id']  # return album if more data is needed
コード例 #13
0
def get_metadata_space(images):
    meta = get_metadata(master_db=True)
    # Mapping between image file path name and the metadata
    meta = {m['path']: m for m in meta}
    space = np.array([[
        meta[i]['age'], mapping[meta[i]['gender']],
        mapping[meta[i]['skinColor']], mapping[meta[i]["accessories"]],
        meta[i]["nailPolish"], meta[i]["irregularities"]
    ] for i in images])

    return meta, space
コード例 #14
0
ファイル: app.py プロジェクト: CoserU/Explainable-RecSys
def ranking_item():
    """
    Rank the items based on their predicted ratings

    Outputs:
    -------
    : predicted ratings, inference time, ordered item ids, item metadata
    """

    if request.method == 'POST':
        ids = request.json
        user_id = int(ids['uid'])
        item_ids = ids['iids']

        user_ids = np.full(100, user_id)
        item_ids = item_ids[1:-1].split(',')
        item_ids = np.array(item_ids).astype(int)

        texts_u = []
        texts_i = []
        for i in user_ids:
            texts_u.append(u_text[i].tolist())
        for j in item_ids:
            texts_i.append(i_text[j].tolist())

        user_ids = user_ids.reshape(-1, 1)
        item_ids = item_ids.reshape(-1, 1)

        # Feed the inputs to the Tensorflow Serving model
        res, time_dif = tf_serving(texts_u, texts_i, user_ids, item_ids)

        # Get the ranking results
        rating = np.array(res['final_rating/add_1:0']).reshape(-1)
        order = np.argsort(rating)[::-1]
        item_ids_new = item_ids.reshape(-1)[order]
        rating_new = rating[order]

        # Prepare the metadata for 10 suggested items
        des_meta, title_meta, price_meta, imurl_meta, categ_meta = get_metadata(
            df_meta, item_ids_new, num_top=10)

        return json.dumps({
            'rating': rating_new.tolist(),
            'infertime': time_dif.total_seconds(),
            'item_ids': item_ids_new.tolist(),
            'des_meta': des_meta,
            'title_meta': title_meta,
            'price_meta': price_meta,
            'imurl_meta': imurl_meta,
            'categ_meta': categ_meta
        })

    else:
        return render_template('candidate.html')
コード例 #15
0
def read_metadata(pdf_path, document_uuid, document_name):
    try:
        metadata_dict = get_metadata(pdf_path)
        return dict(
            original_document=pdf_path,
            metadata=metadata_dict,
            document_uuid=document_uuid,
            document_name=document_name,
        )
    except Exception as e:
        sentry_client(e)
コード例 #16
0
def load_data(num_chord_comp=5, num_grain_comp=5):
    # Grain data
    grain_pca = np.load(stats_pca_path()+'grain_grain_pca_scores.npy')
    #load chord data
    chords = load_chords(cord_length_path())
    chords_pca = get_chords_pca(chords, use_avg=True) 
    #load labels
    metadata, class_map, subclass_map = get_metadata(stats_files())
    classes = np.array([int(x['class_num']) for x in metadata])
    #subclasses = np.array([x['subclass_num'] for x in metadata])
    #         xs          x         y
    return (grain_pca[:, :num_grain_comp] ,chords_pca[:, :num_chord_comp], classes)
コード例 #17
0
    def get_metadata_space(cls, images, unlabelled_db=False):
        meta = utils.get_metadata(unlabelled_db=unlabelled_db)
        # Mapping between image file path name and the metadata
        meta = {m['path']: m for m in meta}
        space = np.array([[
            meta[i]['age'], cls.mapping[meta[i]['gender']],
            cls.mapping[meta[i]['skinColor']],
            cls.mapping[meta[i]["accessories"]], meta[i]["nailPolish"],
            meta[i]["irregularities"]
        ] for i in images])

        return meta, space
コード例 #18
0
def get_articles(input):
    articles = utils.get_metadata(
        input, utils.image_ext, lambda:
        [Article(id=-1, chain_id='', filename='', title='', images=[])],
        lambda x: x[0].images,
        lambda id, chain_id, filename: ArticleImage(id=id,
                                                    chain_id=str(uuid.uuid4()),
                                                    filename=filename,
                                                    regions=[],
                                                    title='',
                                                    page=0,
                                                    idx_on_page=0))
    #ItJim: ^this part didn't work because was lacking parameters.
    return articles
コード例 #19
0
ファイル: task3.py プロジェクト: ankit94/11K-Hands
def get_full_matrix(feature, unlabelled=False, master=False):
    # Get labelled images
    images, data = get_all_vectors(feature,
                                   unlabelled_db=unlabelled,
                                   master_db=master)

    # Get metadata
    meta = get_metadata(unlabelled_db=unlabelled, master_db=master)
    meta = {m['path']: m for m in meta}
    meta_space = np.array([[
        meta[i]['age'], mapping[meta[i]['gender']],
        mapping[meta[i]['skinColor']], mapping[meta[i]["accessories"]],
        meta[i]["nailPolish"], meta[i]["irregularities"]
    ] for i in images])

    return images, meta, np.c_[data, meta_space]
コード例 #20
0
def function_create():
    with utils.AtomicRequest() as atomic:

        function_id = uuid.uuid4().hex

        atomic.driver_endpoint = driver_endpoint

        user, tenant = utils.get_headers(request)

        zip_file = utils.get_zip(request)
        zip_url = utils.upload_zip(function_id, zip_file)

        if not zip_url:
            atomic.errors = True
            return critical_error('Not able to store zip.')

        atomic.zip_url = zip_url

        metadata = utils.get_metadata(request)

        if not utils.validate_json(utils.build_schema, metadata):
            atomic.errors = True
            return bad_request("Error validating json.")

        tag = "{0}_{1}_{2}".format(tenant, user, metadata.get('name'))
        payload = {
            "memory": metadata.get('memory'),
            "tags": [tag],
            "runtime": metadata.get('runtime'),
            "zip_location": zip_url,
            "name": metadata.get('name')
        }

        image_id = utils.create_image(driver_endpoint, payload)
        atomic.image_id = image_id

        function = utils.create_function(tenant, user, function_id, image_id,
                                         zip_url, tag, metadata)

        if not function:
            atomic.errors = True
            return critical_error('Error building the function.')

        return Response(function_id, status=201)
コード例 #21
0
ファイル: main.py プロジェクト: WaseemTheDream/glass-present
    def post(self):
        drive_url = json.loads(self.request.body)['driveurl']
        logging.info("Received the drive url: %s", drive_url)
        drive_id = self.parse_url(drive_url)

        presentation = \
            Presentation.query(Presentation.drive_id == drive_id).get()
        if presentation is None:
            presentation = Presentation(drive_id=drive_id)

        slides = get_metadata(drive_id)
        slides_str = json.dumps(slides)
        logging.info(slides_str)
        presentation.slides = slides_str
        presentation_id = presentation.put().id()

        self.response.write(json.dumps({
            'presentation_id': str(presentation_id),
        }));
コード例 #22
0
def play(url, nid):
	utils.log('play: ' + urllib.quote(url))

	if nid == 'live':
		meta = utils.get_metadata(nid)

		# this is usually the live stream isn't currently active
		if 'error_msg' in meta:
			utils.log('cannot play stream: %s, %s' % (url, meta['error_msg']))
			utils.dialog_error(meta['error_msg'])
			return

	"""
	# XXX disabled as not currently working?

	# permission dance. if we're already logged in (have a valid cookie), no need to log in again
	perms = utils.get_perms(nid)

	if not perms:
		# login and recheck video permissions
		if not utils.wsbk_login():
			return

		perms = utils.get_perms(nid)
		if not perms:
			# we really mustn't have permission
			utils.log('no permission for video %s' % nid)
			utils.dialog_error('No permission to access this video. Check login details in plugin settings.')
			return
	"""

	(stream_url, meta) = utils.get_stream_url(nid)
	listitem = xbmcgui.ListItem(label=meta['title'], iconImage=meta['thumbnail_url'], thumbnailImage=meta['thumbnail_url'])

	utils.log("Playing stream: %s" % stream_url)

	try:
		xbmc.Player().play(stream_url, listitem)
	except:
		utils.dialog_error("Cannot play video")
コード例 #23
0
def plot_y():
    fig, axes = plt.subplots(3, 4, sharex=True)
    fig_2, axes_2 = plt.subplots(3, figsize=(9, 10))

    for i, num_train_episodes in enumerate([500, 1000, 3000]):
        _, y = utils.get_metadata(num_train_episodes=num_train_episodes,
                                  artificial=False)

        for j in np.arange(y.shape[1]):
            ax = axes[i][j]
            ax.set_title(num_train_episodes)
            y_cur = y.iloc[:, j]
            sns.histplot(y_cur, ax=ax, stat="density", bins=8, palette="deep")
            ax.set_ylabel("Densidade")

        aux = y.values.T.flatten()
        aux = pd.DataFrame.from_dict({
            "Algoritmo":
            np.repeat(y.columns, y.shape[0]),
            "Converged":
            aux > 0.0
        })

        sns.countplot(x="Algoritmo",
                      hue="Converged",
                      data=aux,
                      ax=axes_2[i],
                      palette="deep")

        if i != 2:
            axes_2[i].set_xlabel(None)

        ax = axes_2[i]
        ax.set_title(f"Episodios = {num_train_episodes}")
        ax.set_ylabel("Frequencia")
        legend_labels, _ = ax.get_legend_handles_labels()
        ax.legend(legend_labels, ["Não", "Sim"], title="Convergiu?")

    plt.tight_layout()
    plt.show()
コード例 #24
0
def decision_tree_driver(args, evaluate=False):
    images, data_matrix = utils.get_all_vectors(args.decision_model)
    # Fetch unlabelled data (as provided in the settings)
    u_images, u_meta, unlabelled = helper.get_unlabelled_data(
        args.decision_model)

    #matrix, _, _, um = reducer(data_matrix, 30, "nmf", query_vector=unlabelled)
    matrix = data_matrix
    um = unlabelled

    l_matrix = matrix[:len(images)]
    u_matrix = um[:len(u_images)]

    dm = helper.build_labelled_matrix(l_matrix, images, 'aspectOfHand')

    # prepare test data
    query = helper.prepare_matrix_for_evaluation(u_matrix)

    max_depth = args.decision_max_depth
    min_size = args.decision_min_size

    prediction = decision_tree(dm, query, max_depth, min_size)

    dorsal_symbol = 0.0
    palmar_symbol = 1.0

    if evaluate:
        master_meta = utils.get_metadata(master_db=True)
        # Mapping between image file path name and the metadata
        master_meta = {m['imageName']: m for m in master_meta}
        truth = [
            dorsal_symbol
            if master_meta[Path(image).name]['aspectOfHand'].split(' ')[0]
            == 'dorsal' else palmar_symbol for image in u_images
        ]

        print(helper.get_accuracy(truth, prediction))

    return zip(u_images, prediction)
コード例 #25
0
ファイル: mnist.py プロジェクト: tianhaoz95/mangekyo
def train_mnist(project_id, epoch, train_per_epoch, interval):
    check_gpu(logger)
    project_metadata = get_metadata(project_id)
    train(
        dataset=load_mnist_dataset(project_id=project_id,
                                   buffer_size=60000,
                                   batch_size=256),
        gen=build_generator_model(),
        dis=build_discriminator_model(),
        gen_opt=keras.optimizers.Adam(1e-4),
        dis_opt=keras.optimizers.Adam(1e-4),
        logger=logger,
        epochs=epoch,
        start_epoch=0,
        interval=interval,
        train_per_epoch=train_per_epoch,
        sample_size=4,
        batch_size=32,
        visualize=visualize_mnist_sample,
        project_metadata=project_metadata,
        gen_input_generator=MnistInputGenerator(feat_dim=100),
    )
コード例 #26
0
def ppr_driver(args, evaluate=False):
    l_images, u_images, l_meta, u_meta, l_matrix, u_matrix = PreparePPRData.prepare_data(
        args.model, args.k_latent_semantics, args.frt, args.ignore_metadata)

    # Build training data
    labelled = helper.build_matrix_with_labels(l_matrix, l_images, l_meta)

    # prepare test data
    query = helper.prepare_matrix_for_evaluation(u_matrix)

    # Evaluate
    predictions = ppr_classifier(labelled,
                                 query,
                                 frt=args.frt,
                                 k=args.k_latent_semantics,
                                 feature=args.model,
                                 edges=args.graph_edges,
                                 alpha=args.alpha,
                                 convergence=args.convergence)

    dorsal_symbol = 0.0
    palmar_symbol = 1.0

    if evaluate:
        master_meta = utils.get_metadata(master_db=True)
        # Mapping between image file path name and the metadata
        master_meta = {m['imageName']: m for m in master_meta}
        truth = [
            dorsal_symbol
            if master_meta[Path(image).name]['aspectOfHand'].split(' ')[0]
            == 'dorsal' else palmar_symbol for image in u_images
        ]

        print(helper.get_accuracy(truth, predictions))

    # Visualization pending
    return zip(u_images, predictions)
コード例 #27
0
        "HC03_VC13"
    ]
    factors = list(get_factors(sources, n_factors).values())
    means = []
    diffs = []
    for i, factor in enumerate(factors):
        values = np.array(list(factor.values()))
        # values = (values - np.min(values)) / (np.max(values) - np.min(values))
        means.append(np.mean(values))
        diffs.append(np.mean(values, 1))

    # diffs = (diffs - np.min(diffs)) / (np.max(diffs) - np.min(diffs))
    fig = plt.figure()
    plt.plot(sources, 10 * lambdas_diff, label="λ")
    for n_factor, diff in zip(n_factors, diffs):
        plt.plot(sources, diff, label=get_metadata(2010, n_factor, False))
    plt.legend(loc="upper left")
    plt.show()

    X = [np.array(list(xs)) for xs in zip(*diffs)]
    # X = np.array([np.array(diffs[0]) * np.array(diffs[1])]).reshape(-1, 1)
    y = lambdas_diff
    # plt.plot(sources, 3*y)
    # plt.plot(sources, X)
    # plt.show()

    reg = LinearRegression()
    reg.fit(X, y)
    print(reg.score(X, y))
    print(reg.coef_)
    print(reg.intercept_)
コード例 #28
0
ファイル: app.py プロジェクト: rizafahmi/hacktiv8-surl
def new():
    if request.method == 'POST':
        original_url = str(request.form.get('url'))
        pixel_script = str(request.form.get('pixel_script'))
        keyword = str(request.form.get('keyword'))

        try:
            metadata = utils.get_metadata(original_url)

            template_name = "redirection_debug.html"
            if DEBUG == True:
                template_name = "redirection_debug.html"
            else:
                template_name = "redirection.html"

            if "title" in metadata:
                metadata_title = metadata.title
            else:
                metadata_title = ""

            if "type" in metadata:
                metadata_type = metadata.type
            else:
                metadata_type = ""

            if "image" in metadata:
                metadata_image = metadata.image
            else:
                metadata_image = ""

            if "description" in metadata:
                metadata_description = metadata.description
            else:
                metadata_description = ""
        finally:
            metadata_title = ""
            metadata_type = ""
            metadata_image = ""
            metadata_description = ""

        html_file = render_template(template_name, url=original_url, title=metadata_title, type=metadata_type, image=metadata_image, description=metadata_description, pixel_script=pixel_script)

        filename = shortuuid.ShortUUID().random(length=6)
        filename = filename + ".html"

        directory = "r/" + keyword
        if not os.path.exists(directory):
            os.makedirs(directory)

        with open(directory + "/" + filename, mode="w", encoding="utf-8") as file:
            file.write(str(html_file))
            file.close()

        # write to csv
        fp = open("static/" + "data.csv", "a")
        try:
            writer = csv.writer(fp)
            writer.writerow((str(original_url), str(filename)))
        finally:
            fp.close()


        # return redirect(SHORT_SITE + "/static/" + filename )
        return render_template("new.html", redirect_url=SHORT_SITE + directory + "/" + filename)

    return render_template("new.html")
コード例 #29
0
ファイル: predict_many.py プロジェクト: hoangtnm/ai_training
        if prediction in fname:
            passed += 1
        else:
            failed += 1
    print('*' * 50)
    print(' > passed: ', passed)
    print(' > failed: ', failed)
    ar = passed / (passed + failed)
    print(' > accuracy ratio: ', '%.2f' % (ar * 100), '%')


def get_files(dname, fpath):
    dpath = os.path.join(fpath, dname)
    return [f'{dpath}/{fname}' for fname in os.listdir(dpath)]


if __name__ == '__main__':
    model_path = sys.argv[1]
    fpath = sys.argv[2]

    # Training dataset metadata
    _, class_names, class_to_idx = get_metadata(fpath)
    num_classes = len(class_names)
    idx_to_class = {value: key for key, value in class_to_idx.items()}

    flist = [get_files(cls, fpath) for cls in class_names]
    files = list(reduce(lambda x, y: x + y, flist))

    result = predict_all(files, idx_to_class, model_path)
    analyze(result)
コード例 #30
0
    # y = [ class binary vars | subclass binary vars | pct] 
    for item in metadata:
        y[c, class_map[item['class']]] = 1
        y[c, subclass_map[item['subclass']] + len(total_classes) ] = 1
        #TODO fix the filenames or write a script to handle vol frac
#        print item['volume_frac']
#        y[c, len(total_classes) + len(total_subclasses)] = item['volume_frac']
        c += 1 
    return y

if __name__ == '__main__':
    num_grain_comp = 15
    num_chord_comp = 3
    num_folds = 5

    metadata, class_map, subclass_map = get_metadata(paths.stats_files())
    if os.path.isfile(paths.stats_pca_path()+'grain_grain_pca_scores.npy'):
        print 'PCA .npy found, loading.'
        pca_scores = np.load(paths.stats_pca_path()+'grain_grain_pca_scores.npy')
    else:
        x = load_data(paths.stats_files())
        pca_scores = get_pca(x)
        np.save(paths.stats_pca_path()+'grain_grain_pca_scores.npy', pca_scores)
    chords = load_chords(paths.cord_length_path())
    chords_pca = get_chords_pca(chords, use_avg=True) 
    input_params = flatten_input_params(metadata, class_map, subclass_map)    
      

    # PLOTTING FCNS
#    plot_chords(chords[0:5,0])
#    class_labels, class_data = group_components_by_class(metadata, chords_pca)
コード例 #31
0
logging.basicConfig(level=logging.DEBUG)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("input", nargs=1, help="The training hdf5 file")
    parser.add_argument("-pre", dest="pre", help="Pretrained word embeddings file in word2vec format (word <space> embedding, one per line)")
    options = parser.parse_args()

    # Load config parameters
    locals().update(config)
    logging.debug('loaded config')

    # DATA
    hdf5_file = options.input[0]
    word_to_ix, ix_to_word, morpho_to_ix, ix_to_morpho = get_metadata(hdf5_file)
    vocab_size = len(word_to_ix)
    morpho_vocab_size = len(morpho_to_ix)
    train_stream = get_stream(hdf5_file, 'train', batch_size)
    dev_stream = get_stream(hdf5_file, 'dev', batch_size)
    logging.debug('loaded data')
    print "Number of words:", vocab_size
    print "Number of morphemes:", morpho_vocab_size
    # Save the word and morpheme indices to disk
    D = { }
    D["word_to_ix"] = word_to_ix
    D["morpho_to_ix"] = morpho_to_ix
    cPickle.dump(D, open("dicts.pkl", "w"))
    logging.debug('wrote dicts')
    # Load the pretrained vectors if available
    if options.pre is not None:
コード例 #32
0
ファイル: get_qs_graph.py プロジェクト: ForsakeLJK/GSAKT
import numpy as np
import pandas as pd
from utils import get_dataframe, NumpyEncoder, get_metadata
import json

train_dir = "assist09_train.csv"
test_dir = "assist09_test.csv"
skill_matrix_dir = "assist09_skill_matrix.txt"

df_train = get_dataframe(train_dir)
df_test = get_dataframe(test_dir)
# use this to extract the whole q-s graph
df_total = pd.concat([df_train, df_test], ignore_index=True)
skill_matrix = np.loadtxt(skill_matrix_dir)

single_skill_cnt, skill_cnt, max_idx = get_metadata(skill_matrix, df_total)

print("single skill: 0 ~ {}, multi-skill: {} ~ {}, question: {} ~ {}, correctness: {} and {}"\
      .format(single_skill_cnt - 1, single_skill_cnt, skill_cnt - 1, skill_cnt, max_idx - 2, max_idx - 1, max_idx))

# graph -> list of dict
# node: {"type": "skill" or "question", "neighbor": [indices]}
qs_graph = []
# ?: is it feasible to get rid of multi-skills?
# init graph
node_cnt = single_skill_cnt + max_idx - 2 - skill_cnt + 1
for i in range(node_cnt):
    if i >= 0 and i < single_skill_cnt:
        qs_graph.append({"type": "skill", "neighbor": []})
    else:
        qs_graph.append({"type": "question", "neighbor": []})
コード例 #33
0
        AdaBoostClassifier(),
        GaussianNB(),
        LinearDiscriminantAnalysis(),
        QuadraticDiscriminantAnalysis() ]

    for name, clf in zip(names, classifiers):
        scores = cross_val_score(clf, x, y, cv=num_folds)
        print name + ': ' + str(np.mean(scores))


if __name__ == '__main__':
    num_grain_comp = 5
    num_chord_comp = 5
    num_folds = 5

    metadata, class_map, subclass_map = get_metadata(stats_files())
    if os.path.isfile(stats_pca_path()+'grain_grain_pca_scores.npy'):
        print 'PCA .npy found, loading.'
        grain_pca = np.load(stats_pca_path()+'grain_grain_pca_scores.npy')
    else:
        x = load_data(stats_files())
        grain_pca = get_pca(x)
        np.save(stats_pca_path()+'grain_grain_pca_scores.npy', grain_pca)

    chords = load_chords(cord_length_path())
    chords_pca = get_chords_pca(chords, use_avg=True) 
    
    # show the pca plots
    #plt.scatter(grain_pca[:, 0], grain_pca[:,1], alpha=0.85)
    #plt.show()
コード例 #34
0
ファイル: discriminate.py プロジェクト: Keesiu/meta-kaggle
def main(settingsfname, verbose=False):

    settings = utils.get_settings(settingsfname)

    subjects = settings['SUBJECTS']

    data = utils.get_data(settings, verbose=verbose)

    metadata = utils.get_metadata()

    features_that_parsed = [
        feature for feature in settings['FEATURES']
        if feature in list(data.keys())
    ]

    settings['FEATURES'] = features_that_parsed

    utils.print_verbose("=====Feature HDF5s parsed=====", flag=verbose)

    # get model
    model_pipe = utils.build_model_pipe(settings)

    utils.print_verbose("=== Model Used ===\n"
                        "{0}\n==================".format(model_pipe),
                        flag=verbose)

    # dictionary to store results
    subject_predictions = {}

    accuracy_scores = {}

    for subject in subjects:
        utils.print_verbose("=====Training {0} Model=====".format(
            str(subject)),
                            flag=verbose)

        # initialise the data assembler
        assembler = utils.DataAssembler(settings, data, metadata)
        X, y = assembler.test_train_discrimination(subject)

        # get the CV iterator
        cv = utils.sklearn.cross_validation.StratifiedShuffleSplit(
            y, random_state=settings['R_SEED'], n_iter=settings['CVITERCOUNT'])

        # initialise lists for cross-val results
        predictions = []
        labels = []
        allweights = []

        # run cross validation and report results
        for train, test in cv:

            # calculate the weights
            weights = utils.get_weights(y[train])
            # fit the model to the training data
            model_pipe.fit(X[train], y[train], clf__sample_weight=weights)
            # append new predictions
            predictions.append(model_pipe.predict(X[test]))
            # append test weights to store (why?) (used to calculate auc below)
            weights = utils.get_weights(y[test])
            allweights.append(weights)
            # store true labels
            labels.append(y[test])

        # stack up the results
        predictions = utils.np.hstack(predictions)
        labels = utils.np.hstack(labels)
        weights = utils.np.hstack(allweights)

        # calculate the total accuracy
        accuracy = utils.sklearn.metrics.accuracy_score(labels,
                                                        predictions,
                                                        sample_weight=weights)

        print("Accuracy score for {1}: {0:.3f}".format(accuracy, subject))

        # add AUC scores to a subj dict
        accuracy_scores.update({subject: accuracy})

        # store results from each subject
        subject_predictions[subject] = (predictions, labels, weights)

    # stack subject results (don't worrry about this line)
    predictions, labels, weights = map(
        utils.np.hstack, zip(*list(subject_predictions.values())))

    # calculate global accuracy
    accuracy = utils.sklearn.metrics.accuracy_score(labels,
                                                    predictions,
                                                    sample_weight=weights)

    print(
        "predicted accuracy score over all subjects: {0:.2f}".format(accuracy))

    # output AUC scores to file
    accuracy_scores.update({'all': accuracy})

    settings['DISCRIMINATE'] = 'accuracy_scores.csv'
    # settings['AUC_SCORE_PATH'] = 'discriminate_scores'
    utils.output_auc_scores(accuracy_scores, settings)

    return accuracy_scores
コード例 #35
0
def main(settingsfname, verbose=False):

    settings = utils.get_settings(settingsfname)

    subjects = settings['SUBJECTS']

    data = utils.get_data(settings, verbose=verbose)

    metadata = utils.get_metadata()

    features_that_parsed = [feature for feature in
                            settings['FEATURES'] if feature in list(data.keys())]

    settings['FEATURES'] = features_that_parsed

    utils.print_verbose("=====Feature HDF5s parsed=====", flag=verbose)

    # get model
    model_pipe = utils.build_model_pipe(settings)

    utils.print_verbose("=== Model Used ===\n"
                        "{0}\n==================".format(model_pipe), flag=verbose)

    # dictionary to store results
    subject_predictions = {}

    accuracy_scores = {}

    for subject in subjects:
        utils.print_verbose(
            "=====Training {0} Model=====".format(str(subject)),
                            flag=verbose)

        # initialise the data assembler
        assembler = utils.DataAssembler(settings, data, metadata)
        X, y = assembler.test_train_discrimination(subject)

        # get the CV iterator
        cv = utils.sklearn.cross_validation.StratifiedShuffleSplit(
            y,
                               random_state=settings['R_SEED'],
                               n_iter=settings['CVITERCOUNT'])

        # initialise lists for cross-val results
        predictions = []
        labels = []
        allweights = []

        # run cross validation and report results
        for train, test in cv:

            # calculate the weights
            weights = utils.get_weights(y[train])
            # fit the model to the training data
            model_pipe.fit(X[train], y[train], clf__sample_weight=weights)
            # append new predictions
            predictions.append(model_pipe.predict(X[test]))
            # append test weights to store (why?) (used to calculate auc below)
            weights = utils.get_weights(y[test])
            allweights.append(weights)
            # store true labels
            labels.append(y[test])

        # stack up the results
        predictions = utils.np.hstack(predictions)
        labels = utils.np.hstack(labels)
        weights = utils.np.hstack(allweights)

        # calculate the total accuracy
        accuracy = utils.sklearn.metrics.accuracy_score(labels,
                                                        predictions,
                                                        sample_weight=weights)

        print("Accuracy score for {1}: {0:.3f}".format(accuracy, subject))

        # add AUC scores to a subj dict
        accuracy_scores.update({subject: accuracy})

        # store results from each subject
        subject_predictions[subject] = (predictions, labels, weights)

    # stack subject results (don't worrry about this line)
    predictions, labels, weights = map(utils.np.hstack,
                                       zip(*list(subject_predictions.values())))

    # calculate global accuracy
    accuracy = utils.sklearn.metrics.accuracy_score(labels, predictions,
                                                    sample_weight=weights)

    print(
        "predicted accuracy score over all subjects: {0:.2f}".format(accuracy))

    # output AUC scores to file
    accuracy_scores.update({'all': accuracy})

    settings['DISCRIMINATE'] = 'accuracy_scores.csv'
    # settings['AUC_SCORE_PATH'] = 'discriminate_scores'
    utils.output_auc_scores(accuracy_scores, settings)

    return accuracy_scores
コード例 #36
0
ファイル: narps.py プロジェクト: felixholzmeister/narps
    def convert_to_zscores(self, map_metadata_file=None, overwrite=None):
        """
        convert rectified images to z scores
        - unthresholded images could be either t or z images
        - if they are already z then just copy
        - use metadata supplied by teams to determine image type
        """
        log_to_file(self.dirs.logfile,
                    '\n\n%s' % sys._getframe().f_code.co_name)
        func_args = inspect.getargvalues(inspect.currentframe()).locals
        log_to_file(self.dirs.logfile, stringify_dict(func_args))

        if overwrite is None:
            overwrite = self.overwrite
        if map_metadata_file is None:
            map_metadata_file = os.path.join(
                self.dirs.dirs['orig'], 'narps_neurovault_images_details.csv')
        unthresh_stat_type = get_map_metadata(map_metadata_file)
        metadata = get_metadata(self.metadata_file)

        n_participants = metadata[['n_participants', 'NV_collection_string']]

        n_participants.index = metadata.teamID

        unthresh_stat_type = unthresh_stat_type.merge(n_participants,
                                                      left_index=True,
                                                      right_index=True)

        for teamID in self.complete_image_sets:
            if teamID not in unthresh_stat_type.index:
                print('no map metadata for', teamID)
                continue
            # this is a bit of a kludge
            # since some contrasts include all subjects
            # but others only include some
            # we don't have the number of participants in each
            # group so we just use the entire number
            n = unthresh_stat_type.loc[teamID, 'n_participants']

            for hyp in range(1, 10):
                infile = self.teams[teamID].images['unthresh']['rectified'][
                    hyp]
                if not os.path.exists(infile):
                    print('skipping', infile)
                    continue
                self.teams[teamID].images['unthresh']['zstat'][
                    hyp] = os.path.join(self.dirs.dirs['zstat'],
                                        self.teams[teamID].datadir_label,
                                        'hypo%d_unthresh.nii.gz' % hyp)
                if not overwrite and os.path.exists(
                        self.teams[teamID].images['unthresh']['zstat'][hyp]):
                    continue

                if unthresh_stat_type.loc[teamID,
                                          'unthresh_type'].lower() == 't':
                    if not os.path.exists(
                            os.path.dirname(self.teams[teamID].
                                            images['unthresh']['zstat'][hyp])):
                        os.mkdir(
                            os.path.dirname(
                                self.teams[teamID].images['unthresh']['zstat']
                                [hyp]))
                    print("converting %s (hyp %d) to z - %d participants" %
                          (teamID, hyp, n))
                    TtoZ(infile,
                         self.teams[teamID].images['unthresh']['zstat'][hyp],
                         n - 1)
                elif unthresh_stat_type.loc[teamID, 'unthresh_type'] == 'z':
                    if not os.path.exists(
                            os.path.dirname(self.teams[teamID].
                                            images['unthresh']['zstat'][hyp])):
                        os.mkdir(
                            os.path.dirname(
                                self.teams[teamID].images['unthresh']['zstat']
                                [hyp]))
                    if not os.path.exists(self.teams[teamID].images['unthresh']
                                          ['zstat'][hyp]):
                        print('copying', teamID)
                        shutil.copy(
                            infile,
                            os.path.dirname(
                                self.teams[teamID].images['unthresh']['zstat']
                                [hyp]))
                else:
                    # if it's not T or Z then we skip it as it's not usable
                    print('skipping %s - other data type' % teamID)
コード例 #37
0
ファイル: train.py プロジェクト: jqlts1/blocks-char-rnn
from blocks.graph import ComputationGraph, apply_dropout
from blocks.algorithms import StepClipping, GradientDescent, CompositeRule, RMSProp
from blocks.filter import VariableFilter
from blocks.extensions import FinishAfter, Timing, Printing, saveload
from blocks.extensions.training import SharedVariableModifier
from blocks.extensions.monitoring import DataStreamMonitoring, TrainingDataMonitoring
from blocks.monitoring import aggregation
from utils import get_metadata, get_stream, track_best, MainLoop
from model import nn_fprop
from config import config

# Load config parameters
locals().update(config)

# DATA
ix_to_char, char_to_ix, vocab_size = get_metadata(hdf5_file)
train_stream = get_stream(hdf5_file, 'train', batch_size)
dev_stream = get_stream(hdf5_file, 'dev', batch_size)


# MODEL
x = tensor.matrix('features', dtype='uint8')
y = tensor.matrix('targets', dtype='uint8')
y_hat, cost, cells = nn_fprop(x, y, vocab_size, hidden_size, num_layers, model)

# COST
cg = ComputationGraph(cost)

if dropout > 0:
    # Apply dropout only to the non-recurrent inputs (Zaremba et al. 2015)
    inputs = VariableFilter(theano_name_regex=r'.*apply_input.*')(cg.variables)
コード例 #38
0
ファイル: app.py プロジェクト: judsonsam/tekautoday
def record(record_hash):
    record = records.get_record_by_hash(record_hash)
    metadata = utils.get_metadata(record)
    context = utils.format_response(record, metadata)

    return flask.render_template('index.html', **context)
コード例 #39
0
def main():
    metadata = utils.get_metadata()
    settings = utils.get_settings('probablygood.gavin.json')
    settings['R_SEED'] = None
    # settings['SUBJECTS'] = ['Patient_2']
    scaler = sklearn.preprocessing.StandardScaler()
    thresh = sklearn.feature_selection.VarianceThreshold()
    # selector = sklearn.feature_selection.SelectKBest()
    classifier = sklearn.svm.SVC(probability=True)
    pipe = sklearn.pipeline.Pipeline([('scl', scaler),
                                      ('thr', thresh),
                                      #                                  ('sel', selector),
                                      ('cls', classifier)])

    output = {}

    data = utils.get_data(settings)
    da = utils.DataAssembler(settings, data, metadata)
    global_results = {}
    for subject in list(settings['SUBJECTS']) + ['global']:
        global_results[subject] = {}

    for i in range(10):
        print("iteration {0}".format(i))

        for subject in settings['SUBJECTS']:
            print(subject)
            X, y = da.build_training(subject)
            # cv = utils.Sequence_CV(da.training_segments, metadata)
            train, test, train_results, test_results = fit_and_return_parts_and_results(
                da,
                                                                    metadata,
                                                                    pipe,
                                                                    X,
                                                                    y)
            output.update({subject: {'train': train,
                                     'test': test,
                                     'train_results': train_results,
                                     'test_results': test_results}})

    #    with open('raw_cv_data.pickle', 'wb') as fh:
    #        pickle.dump(output, fh)

        summary_stats = mean_var_calc(output)

        for subject in settings['SUBJECTS']:
            for t in summary_stats[subject]:
                try:
                    global_results[subject][t] += [summary_stats[subject][t]]
                except KeyError:
                    global_results[subject][t] = [summary_stats[subject][t]]
    print(global_results)
    for subject in settings['SUBJECTS']:
        for t in global_results[subject]:
            meanscore = np.mean(global_results[subject][t])
            varscore = np.var(global_results[subject][t])
            print("For {0} mean {1} was "
                  "{2} with sigma {3}".format(subject, t, meanscore, varscore))

    with open('summary_stats.pickle', 'wb') as fh:
        pickle.dump(global_results, fh)
コード例 #40
0
ファイル: predict.py プロジェクト: hoangtnm/ai_training
import sys

import torch
from PIL import Image

from utils import get_device
from utils import get_metadata
from utils import get_net
from utils import get_prediction_class
from utils import preprocess_image

if __name__ == '__main__':
    device = get_device()

    # Training dataset metadata
    _, class_names, class_to_idx = get_metadata(sys.argv[1])
    num_classes = len(class_names)
    idx_to_class = {value: key for key, value in class_to_idx.items()}

    # Data preparation
    image = Image.open(sys.argv[2])

    # Net initialization
    net = get_net(classes=num_classes)
    checkpoint_dict = torch.load(os.path.join('checkpoint', 'checkpoint.pth'),
                                 map_location=device)
    net.load_state_dict(checkpoint_dict['model_state_dict'])
    net.eval()
    net.to(device)

    # Prediction
コード例 #41
0
    "alpha": 0.5,
    "lambda": 400,
    "subsample": 0.7,
    "colsample_bytree": 0.3,
    "objective": "binary:logistic",
    "scale_pos_weight": 0.9,
    "seed": 16,
    "gpu_id": 0,
    "tree_method": "gpu_hist",
}

for artificial in [False, True]:
    fig, axes = plt.subplots(1,
                             4,
                             figsize=(20, 15 / (1 + 5 * int(artificial))),
                             sharex=True)

    feat_imp = dict()
    X, y = utils.get_metadata(500, artificial=artificial)

    y = y > 0

    for i in np.arange(4):
        model = xgboost.XGBClassifier(**params).fit(X, y.iloc[:, i])
        imp = pd.Series(
            model.get_booster().get_fscore()).sort_values(ascending=True)
        imp.plot(kind="barh", ax=axes[i])

    fig.tight_layout()
    plt.show()
コード例 #42
0
ファイル: app.py プロジェクト: CoserU/Explainable-RecSys
def rating_review():
    """
    Predict personalized review-usefulness

    Outputs:
    -------
    : predicted ratings, inference time, top reviews with ratings, other reviews with ratings, item metadata
    """
    if request.method == 'POST':
        ids = request.json
        user_id = int(ids['uid'])
        item_id = int(ids['iid'])

        # Feed the inputs to the Tensorflow Serving model
        res, time_dif = tf_serving([u_text[user_id].tolist()],
                                   [i_text[item_id].tolist()],
                                   np.array([[user_id]]),
                                   np.array([[item_id]]))

        # Get the rating and ordered reviews based on their review-usefulness
        rating = np.array(res['final_rating/add_1:0']).reshape(-1)
        item_rev_weights = np.array(
            res['item_rev_weights/transpose_1:0']).reshape(-1)

        order = np.argsort(item_rev_weights)[::-1]
        rev_texts = item_rev_original[item_id][:review_num_i]
        if len(rev_texts) < review_num_i:
            rev_texts = rev_texts + [''] * (review_num_i - len(rev_texts))
        rev_texts = np.array(rev_texts)[order]

        # Top-3 reviews and other reviews
        toprevs = []
        otherrevs = []

        for i, rev_text in enumerate(rev_texts):
            if rev_text:
                if i < 3 or len(toprevs) < 3:
                    toprevs.append(rev_text)
                else:
                    otherrevs.append(rev_text)

        rev_rate_top = [int(float(df_revrate[toprev])) for toprev in toprevs]
        rev_rate_other = [
            int(float(df_revrate[otherrev])) for otherrev in otherrevs
        ]

        # Prepare the metadata for the item
        des_meta, title_meta, price_meta, imurl_meta, categ_meta = get_metadata(
            df_meta, item_id, single_pred=True)

        return json.dumps({
            'rating': rating.tolist(),
            'infertime': time_dif.total_seconds(),
            'toprevs': toprevs,
            'otherrevs': otherrevs,
            'rev_rate_top': rev_rate_top,
            'rev_rate_other': rev_rate_other,
            'des_meta': des_meta,
            'title_meta': title_meta,
            'price_meta': price_meta,
            'imurl_meta': imurl_meta,
            'categ_meta': categ_meta
        })

    else:
        return render_template('candidate.html')
コード例 #43
0
def info_metadata():
    res = get_metadata()
    res['volume'] = lib.SpPlaybackGetVolume()
    return jsonify(res)
コード例 #44
0
def info_metadata():
    res = get_metadata()
    res['volume'] = lib.SpPlaybackGetVolume()
    return jsonify(res)
コード例 #45
0
ファイル: train.py プロジェクト: ixtel/blocks-char-rnn
from blocks.algorithms import StepClipping, GradientDescent, CompositeRule, RMSProp
from blocks.filter import VariableFilter
from blocks.extensions import FinishAfter, Timing, Printing
from blocks.extensions.training import SharedVariableModifier
from blocks.extensions.monitoring import DataStreamMonitoring, TrainingDataMonitoring
from blocks.monitoring import aggregation
from blocks.extensions import saveload
from utils import get_metadata, get_stream, track_best, MainLoop
from model import nn_fprop
from config import config

# Load config parameters
locals().update(config)

# DATA
ix_to_char, char_to_ix, vocab_size = get_metadata(hdf5_file)
train_stream = get_stream(hdf5_file, 'train', batch_size)
dev_stream = get_stream(hdf5_file, 'dev', batch_size)


# MODEL
x = tensor.matrix('features', dtype='uint8')
y = tensor.matrix('targets', dtype='uint8')
y_hat, cost = nn_fprop(x, y, vocab_size, hidden_size, num_layers, model)

# COST
cg = ComputationGraph(cost)

if dropout > 0:
    # Apply dropout only to the non-recurrent inputs (Zaremba et al. 2015)
    inputs = VariableFilter(theano_name_regex=r'.*apply_input.*')(cg.variables)
コード例 #46
0
ファイル: app.py プロジェクト: judsonsam/tekautoday
def api_record(record_hash):
    metadata = utils.get_metadata(records.get_record_by_hash(record_hash))

    return flask.jsonify(**metadata)