def copy(self, ids):
     file_utils.make_directory(self.reports_directory)
     ids_array = ids.keys()
     # ids_array = [ids_array[0]]
     self.data_utils.batch_fetch_docs_for_ids(base_url=self.src_server,
                                             ids=ids_array,
                                             index=self.src_index,
                                             type=self.src_type,
                                             docs_fetched=self.docs_fetched, batch_size=500)
Esempio n. 2
0
def get_doc_ids(server, src_index, src_type, dest_dir, dest_file_name, query=None):
    documents_ids = file_utils.load_file(dest_dir, dest_file_name)

    if len(documents_ids) == 0:
        documents_ids = export_doc_ids(server, src_index, src_type, query)

        print __name__, 'Saving to', dest_dir, dest_file_name
        file_utils.make_directory(dest_dir)
        file_utils.save_file(dest_dir, dest_file_name, documents_ids)

    return documents_ids
Esempio n. 3
0
    def dump_to_file(self):
        logs_directory = self.log_files_directory + '/' + self.data_source_name
        file_utils.make_directory(logs_directory)

        
Esempio n. 4
0
	region = system_variables[8]

	
	if sys.platform == 'linux2':
		# set constant variables ec2
		output_dir = '/home/ec2-user/output/'
		input_dir = '/home/ec2-user/input/'
		processed_dir = '/home/ec2-user/processed/'
	else:
		# set constant variables local
		output_dir = 'processing/output/'
		input_dir = 'processing/input/'
		processed_dir = 'processing/processed/'

	# make the directories
	file_utils.make_directory(output_dir)
	file_utils.make_directory(input_dir)
	file_utils.make_directory(processed_dir)

	# create the s3 utility for the peon to use
	s3_util = s3_utility.S3Utility(access_key, secret_key)

	# load system parameters set by master node into local variables to use in init of peon
	peon = Peon(
		instance_id,
		s3_util,
		input_s3_bucket,
		output_s3_bucket,
		processed_s3_bucket,
		data_group_id,
		processing_strategy,
Esempio n. 5
0
    filename = "%s/word-assignments.dat" % directory
    w_assign_file = open(filename, "w")

    for doc_index in range(0, corpus.num_docs):

        # Show user script is still processing
        if (doc_index % 100) == 0:
            print("final e step document %d" % doc_index)

        likelihood += util_functions.lda_inference(corpus.doc_list[doc_index], model, var_gamma[doc_index], phi)
        file_utils.write_word_assignment(w_assign_file, corpus.doc_list[doc_index], phi, model)

    w_assign_file.close()
    likelihood_file.close()


if __name__ == "__main__":
    # define global variables

    if len(sys.argv) == 7:
        global_att.INITIAL_ALPHA = float(sys.argv[1])
        global_att.NTOPICS = int(sys.argv[2])
        file_utils.read_settings(sys.argv[3])
        corpus = corpus.Corpus(sys.argv[4])
        file_utils.make_directory(sys.argv[6])
        run_em(sys.argv[5], sys.argv[6], corpus)

    else:
        print("usage : lda_estimate [initial alpha] [k] [settings] "
              "[data] [random/seeded/*] [directory]")