def run_experiment_small_datasets(self):
     # load dataset
     curr_dir = os.getcwd()
     os.chdir("..")
     dataset, trn_splits, tst_splits = mydatasets.load_dataset(
         self.dataset_name)
     os.chdir(curr_dir)
     all_results = None
     # loop over splits, reps, trees
     for split_num in self.splits:
         trn_data, tst_data = mydatasets.get_small_dataset_split(
             dataset, trn_splits, tst_splits, split_num)
         x_mat, y_mat = mydatasets.get_arrays(trn_data)
         x_tst, y_tst = mydatasets.get_arrays(tst_data)
         for rep_num in range(0, self.num_repititions):
             df = self.run_basic_experiment(x_mat, y_mat, x_tst, y_tst)
             df.loc[0, "dataset"] = self.dataset_name
             df.loc[0, "split"] = split_num
             df.loc[0, "rep_num"] = rep_num
             if all_results is None:
                 all_results = df
             else:
                 all_results = all_results.append(df, ignore_index=True)
             with open(self.outfilename, "w") as fo:
                 all_results.to_csv(fo, index=False)
	def __init__(self,dataset_name,splits,num_repititions,outfilename):
		assert(dataset_name in ["mediamill","delicious"])
		assert(num_repititions>0)
		self.outfilename=outfilename
		self.num_repititions=num_repititions
		self.dataset_name=dataset_name
		self.splits=splits
		# load data
		curr_dir=os.getcwd()
		os.chdir("..")
		self.dataset, self.trn_splits, self.tst_splits = mydatasets.load_dataset(self.dataset_name)
		os.chdir(curr_dir)
		# static label tree arguments
		base= GaussianNB() if dataset_name=="mediamill" else BernoulliNB()
		self.static_args={
							'stopping_condition' : LeafSizeStoppingCondition(1),
							'leaf_classifier' : OVAMultiLabelClassifier(base),
							'internal_classifier' : OVAMultiLabelClassifier(base)
						 }
		#partitioning methods
		self.methods=[RandomPartitioner,KMeansPartitioner,BalancedKMeansPartitioner]
Ejemplo n.º 3
0
                    action='store_true',
                    default=False,
                    help='train or test')
parser.add_argument('-class_num', type=int, default=10, help='train or test')
args = parser.parse_args()

# load data
print("\nLoading data...")
# args.embed_num = 100
# cnn = model.CNN_Text(args)
# print(cnn)
# text_field = data.Field(lower=True)
# label_field = data.Field(sequential=False)
# train_iter, dev_iter = mr(text_field, label_field, device=-1, repeat=False)

text_field, label_field, train_iter, val_iter, test_iter = mydatasets.load_dataset(
    args)

# train_iter, dev_iter, test_iter = sst(text_field, label_field, device=-1, repeat=False)

# update args and print
args.embed_num = len(text_field.vocab)
print(label_field)
# 打印配置
for attr, value in sorted(args.__dict__.items()):
    print("\t{}={}".format(attr.upper(), value))

# model
cnn = model.CNN_Text(args)
if args.snapshot is not None:
    print('\nLoading model from {}...'.format(args.snapshot))
    cnn.load_state_dict(torch.load(args.snapshot))
Ejemplo n.º 4
0
df =df[cols]

# Path for saving model
PATH_OUTPUT = "best_CNNmodel/"
os.makedirs(PATH_OUTPUT, exist_ok=True)

# Some parameters
MODEL_TYPE = 'CNN'  # TODO: Change this to 'MLP', 'CNN', or 'RNN' according to your task
model = MyCNN()
save_file = 'modelCNN.pth'
NUM_EPOCHS = 10
BATCH_SIZE = 50
USE_CUDA = False  # Set 'True' if you want to use GPU
NUM_WORKERS = 0  # Number of threads used by DataLoader. You can adjust this according to your machine spec.

train_dataset, valid_dataset, test_dataset = load_dataset(df,"Frontal", MODEL_TYPE)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)


criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
criterion.to(device)

best_val_acc = 0.0
train_losses, train_accuracies = [], []