def pseudo_cluster_tt(self, cluster, type, name):
        if type == 'bias':
            rating_cluster = pd.merge(self.rating_dense_bias, cluster, on = 'movieId')
        elif type == 'original':
            rating_cluster = pd.merge(self.rating_dense, cluster, on = 'movieId')
        else:
            raise ValueError('Unrecoginzed type')
        rating_cluster = rating_cluster.groupby(['userId', 'cluster'])['value'].agg(np.mean).reset_index()
        user_fav_cluster = rating_cluster.groupby('userId').apply(lambda x: x.loc[x['value']
                                                                  .argmax()]).reset_index(drop=1)
        tmp = pd.merge(self.rating_dense, user_fav_cluster, on='userId')
        pseudo_rating = tmp.groupby(['cluster', 'movieId'])['rating'].agg(np.mean).reset_index()
        pseudo_rating['userId'] = utils.fake_uid(pseudo_rating.cluster)

        train_ratings = pd.concat([self.ratings_for_train[['userId', 'movieId', 'rating']],
                                   pseudo_rating[['userId', 'movieId', 'rating']]],
                                  ignore_index =True)
        train_ratings.to_csv(utils.get_output_name(self.train_file, type+'_'+name),
                             header=False, index=False)
        test_ratings = pd.DataFrame({'userId': np.unique(pseudo_rating.userId), 'movieId': 1, 'rating': 2})
        test_ratings[['userId', 'movieId', 'rating']]\
            .to_csv(utils.get_output_name(self.test_file, type+'_'+name), header=False, index=False)
        labels = self.label_clusters(cluster)
        labels.to_csv(utils.get_output_name(self.train_file, type+'_'+name+'_label'),
                      index=False)
        cluster.to_csv(utils.get_output_name(self.train_file, type+'_'+name+'_cluster'),
                       index=False)
        pseudo_rating[['userId', 'movieId', 'rating']].to_csv(
            utils.get_output_name(self.train_file, type+'_'+name+'_user'), index=False)
Beispiel #2
0
 def write_train_test_movielens(self, train_fn, test_fn, cost_fn, movie_fn,
                                train_this_fold, train_other_folds, test, n):
     movies = self.rank_movie(train_other_folds)
     movie_list = pd.DataFrame({'item':movies, 'rank':range(1, len(movies)+1)})
     train_this_fold_join = pd.merge(train_this_fold, movie_list, on='item')
     train_selected = train_this_fold_join.groupby('user').apply(self.top_n, n, 'rank').reset_index(drop=True)
     train_final = pd.concat([train_other_folds, train_selected[['user', 'item', 'rating', 'time']]])
     # output to files
     pd.DataFrame({'movie': movies}).to_csv(utils.get_output_name(train_fn, str(n)+'_'+movie_fn))
     train_selected.to_csv(utils.get_output_name(train_fn, str(n)+'_'+cost_fn), index=False)
     train_final.to_csv(utils.get_output_name(train_fn, str(n)), header=False, index=False)
     test.to_csv(utils.get_output_name(test_fn, str(n)), header=False, index=False)
Beispiel #3
0
 def write_train_test(self, train_fn, test_fn, select_fn, movie_fn,
                      train_this_fold, train_other_folds, test, n):
     # generate movie list and select only these to include in the training part
     # for this fold and combine with the other folds to make final training file
     movie_list = self.gen_movie_list(train_other_folds, n)
     train_this_fold['rated'] = train_this_fold.item.isin(movie_list)
     train_selected = train_this_fold[train_this_fold.rated].drop('rated', 1)
     train_final = pd.concat([train_other_folds, train_selected])
     # output to files
     pd.DataFrame({'movie': movie_list}).to_csv(utils.get_output_name(train_fn, str(n)+'_'+movie_fn))
     train_this_fold.to_csv(utils.get_output_name(train_fn, str(n)+'_'+select_fn), index=False)
     train_final.to_csv(utils.get_output_name(train_fn, str(n)), header=False, index=False)
     test.to_csv(utils.get_output_name(test_fn, str(n)), header=False, index=False)
Beispiel #4
0
def html_to_pdf(file_name, output_name=None):
    output_name = output_name if output_name else get_output_name(file_name, 'pdf')
    wk_path = Config.get('WKH2P_PATH', '')
    configuration = pdfkit.configuration(wkhtmltopdf=wk_path) if wk_path else None
    pdfkit.from_file(file_name, output_name,
                     configuration=configuration,
                     options=Config["WKH2P_OPTION"])
    return output_name
Beispiel #5
0
def md_to_html(file_name, output_name=None):
    text = convert_md_html(file_name)
    html = fill_html(text)

    output_name = output_name if output_name else get_output_name(
        file_name, 'html')

    with open(output_name, 'w', encoding='utf-8') as f:
        f.write(html)

    return output_name
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('train_file', type=str, help='Location of training files')
    parser.add_argument('test_file', type=str, help='Location of testing files')
    parser.add_argument('model_file', type=str, help='Location of the model recommendation file')
    parser.add_argument('cluster_name', type=str,
                        help='Name of the clustering algorithm backing the recommendation')
    parser.add_argument('prediction', type=str,
                        help='The list of prediction algorithm names')
    parser.add_argument('partition', type=int,
                        help='The number of the file partition')
    parser.add_argument('score_type', type=str, help='Type of scoring process')
    args = parser.parse_args()

    recommender = ClusterRecommender()
    cluster_file = utils.get_output_name(args.train_file, args.cluster_name+'_cluster')
    recommender.train(args.cluster_name, args.prediction, args.model_file, cluster_file, args.partition)
    if args.score_type not in ['optimal', 'simulation']:
        raise ValueError('Unrecognized input for score_type')
    recommender.score_item(args.train_file, args.test_file, args.score_type)
Beispiel #7
0
						type = lambda arg: is_valid_file(parser, arg), 
						help = "The path to e2p2 program")
	parser.add_argument("-o", "--out",
		                dest = "output",
		                type = str,
		                default = "[Input fasta filename without its extension].pf",
		                help = "The output file name")  
	args = parser.parse_args()
	if args.file is None or args.path is None: 
			parser.print_help()
			exit(0)
	#
	# Retrieve fasta file directory path
	fasta_path = os.path.dirname(os.path.abspath(args.file))
	test_fasta = os.path.join(fasta_path, ".e2p2_test_file.fasta")
	
	# Restructure the fasta file to be good for E2P2 program
	restructure_fasta(args.file, test_fasta)
	
	# Running the E2P2 program
	default_output = "[Input fasta filename without its extension].pf"
	if args.output == default_output: args.output = get_output_name(args.file) + ".pf"
	command = ["python", args.path, "-i", test_fasta, "-o", args.output]
	subprocess.call(command, stdout = open('/dev/null', 'w'), stderr = subprocess.STDOUT)
	
	# Deleting the temporary and intermediate files
	e2p2_path = os.path.dirname(os.path.abspath(args.path))
	if os.path.exists(test_fasta): os.remove(test_fasta)
	if os.path.exists(os.path.join(e2p2_path, "run")): 
		shutil.rmtree(os.path.join(e2p2_path, "run"))	
Beispiel #8
0
                        help="The path to interproscan program")
    parser.add_argument(
        "-o",
        "--out",
        dest="output",
        type=str,
        default="[Input fasta filename without its extension].tsv",
        help="The output file name")

    args = parser.parse_args()
    if args.file is None or args.path is None:
        parser.print_help()
        exit(0)
    #
    # Running the interproscan program
    default_output = "[Input fasta filename without its extension].tsv"
    if args.output == default_output:
        args.output = get_output_name(args.file) + ".tsv"
    #if os.path.exists(os.path.abspath("runIprScan.sh")): os.remove(os.path.abspath("runIprScan.sh"))
    # Record time.
    time_stamp = str(time.time())
    script = open("runIprScan." + time_stamp + ".sh", "w")
    command = args.path + " -i " + args.file + " -f TSV -pa -o " + args.output
    command += " -iprlookup -goterms -dp -crid UNIDAUPIF -cpu 30\n"
    script.write(command)
    script.close()
    print commands.getoutput("chmod +x runIprScan." + time_stamp + ".sh")
    print commands.getoutput(
        "qsub -q all.q@@bigmem -shell yes -S /bin/bash -cwd runIprScan." +
        time_stamp + ".sh")
Beispiel #9
0
        """Fast version of the main script that compute the R2 maps for a given subject and model, without nested cross-validation."""
    )
    parser.add_argument(
        "--yaml_file",
        type=str,
        default=
        "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/code/fMRI/template.yml",
        help=
        "Path to the yaml containing the parameters of the script execution.")

    args = parser.parse_args()
    parameters = read_yaml(args.yaml_file)
    input_path = parameters['input']
    output_path_ = parameters['output']
    subject = get_subject_name(parameters['subject'])
    output_path = get_output_name(output_path_, parameters['language'],
                                  subject, parameters['model_name'])
    logs = Logger(
        get_output_name(output_path_, parameters['language'], subject,
                        parameters['model_name'], 'logs.txt'))
    save_yaml(parameters, output_path + 'config.yml')

    logs.info("Fetching maskers...", end='\n')
    kwargs = {
        'detrend': parameters['detrend'],
        'standardize': parameters['standardize'],
        'high_pass': parameters['high_pass'],
        'low_pass': parameters['low_pass'],
        'mask_strategy': parameters['mask_strategy'],
        #'dtype': parameters['dtype'],
        'memory_level': parameters['memory_level'],
        'smoothing_fwhm': parameters['smoothing_fwhm'],
Beispiel #10
0
        description=
        """Main script that compute the R2 maps for a given component by SRM and model."""
    )
    parser.add_argument(
        "--yaml_file",
        type=str,
        default=
        "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/code/fMRI/template.yml",
        help=
        "Path to the yaml containing the parameters of the script execution.")

    args = parser.parse_args()
    parameters = read_yaml(args.yaml_file)
    input_path = parameters['input']
    output_path_ = parameters['output']
    output_path = get_output_name(output_path_, parameters['language'], 'srm',
                                  parameters['model_name'])
    logs = Logger(
        get_output_name(output_path_, parameters['language'], 'srm',
                        parameters['model_name'], 'logs.txt'))
    output_folder = os.path.join(output_path_, parameters['language'], 'srm',
                                 parameters['model_name'])
    check_folder(output_folder)
    save_yaml(parameters, output_path + 'config.yml')

    logs.info("Retrieve arguments for each model...")
    kwargs_splitter = get_splitter_information(parameters)
    kwargs_compression = get_compression_information(parameters)
    kwargs_transformation = get_data_transformation_information(parameters)
    kwargs_estimator_model = get_estimator_model_information(parameters)
    logs.validate()