Ejemplo n.º 1
0
def phase_offset_set():
    '''Get the input of phase and offset'''
    global offsetEntry, phaseEntry

    try:
        offset_tmp = int(offsetEntry.get())
    except:
        offset_tmp = 0
        offsetEntry.delete(0, END)

    try:
        phase_tmp = float(phaseEntry.get())
    except:
        phase_tmp = 0
        phaseEntry.delete(0, END)

    if phase_tmp >= -(
            np.pi
    ) and phase_tmp <= np.pi and offset_tmp >= 0 and offset_tmp < P.Params[
            'osr']:
        P.Params['phase'] = phase_tmp
        P.Params['offset'] = offset_tmp
        P.offphaseReset()

    elif phase_tmp < -(np.pi) or phase_tmp > np.pi:
        phaseEntry.delete(0, END)
        phaseEntry.insert(0, P.Params['phase'])
        showerror('error', 'The phase is out of range! Please enter again.')
    else:
        offsetEntry.delete(0, END)
        offsetEntry.insert(0, P.Params['offset'])
        showerror('error', 'The offset is out of range! Please enter again.')
Ejemplo n.º 2
0
 def AddParameterToModelFromIndex(self,index):
     """
     See AddParameterFromIndex. This does *not* update the view.
     
     Args:
         Index: the in-range location of the data click
     Returns:
         wrapIdx, the index of the parameter (possibly an update)
     """
     n = self.CurrentX.size
     if (not (index < n)):
         raise IndexError("No index {:d} in array of size {:d}".\
                          format(index,n))
     x = self.CurrentX[index]
     y = self.CurrentY[index]
     newDat = Parameter.ParameterData(index,x,y)
     ParamMetaList = self.ParamMeta.mParams
     nParams = len(ParamMetaList)
     if (nParams == 0):
         raise ValueError("Model instantiated without Parameters")
     wrapIdx = self.CurrentParamNum % nParams
     if (self.CurrentParamNum >= nParams):
         # then all the parameter are here. 'Wraparound'
         self.CurrentParams[wrapIdx] = newDat
     else:
         # can simply add
         self.CurrentParams.append(newDat)
     # let the specific model know what parameters we just used.
     self.CurrentParamNum += 1
     self.ParameterMade(wrapIdx, self.CurrentParamNum % nParams)
     if (self.AutoUpdate and self.CurrentParamNum >= nParams):
         self.PushToDatabase()
     return wrapIdx
Ejemplo n.º 3
0
def extractWeatherInfo(part_data, startNum=0, city_name=None):
    """
    从{startNum}开始抽取天气信息,晴或者多云或者小雨为0(转中雨不包含), 其他为1
    :param part_data:需要含有两列组成,一列为count,另一列为time,注意这里的数据是要按日期补全的数据,不然会有不对应的情况
    :param startNum: 前面跳过的样例数,也就是从第{startNum}开始生成
    :param city_name: 城市名字
    :return: ndarray,第一列为workOrWeekend,第2列为哪个样例的日期,0为work,1为weekend
    """
    if city_name is None:
        raise Exception("city name is None")
        return
    if (startNum < 0 or startNum >= part_data.shape[0]):
        raise Exception("parameter skipNum error")
        return
    weathers = []
    times = part_data["time"].values
    weather_ = Parameter.getWeather_info()
    weather_part = weather_[weather_.area == city_name][["date", "weather"]]
    for i in range(startNum, part_data.shape[0], 1):
        time = times[i]
        v = weather_part[weather_part.date == time.replace(
            "-", "/")]["weather"].values
        if (len(v) == 0 or pd.isnull(v[0])):
            v = 0
        else:
            v = v[0]
            if "云" in v or "晴" in v or ("小雨" in v and "阴" in v):
                v = 0
            else:
                v = 1
        weathers.append(v)
        weathers.append(time)

    weathers = np.reshape(weathers, (len(weathers) / 2, 2))
    return weathers
Ejemplo n.º 4
0
def test_parameter():
    p = Parameter("a", 1)
    assert p.name == "a"
    assert p.value == 1
    assert p.fixed is False
    assert p.max is None
    assert p.min is None
Ejemplo n.º 5
0
    def is_done(self, job_id, url):
        """
        Check job is done
        :param job_id:
        :return:
        """
        para = Parameter.parameter(0, job_id)
        friday_run = para.is_finish()

        encode_str = DataEncoding.DataEncoding(json.dumps(friday_run))
        encode_body = encode_str.DesEncrypt()

        self.data["data"] = str(encode_body.replace(b'\n', b'').decode('utf-8'))

        operation = json.dumps(self.data)

        response = requests.post(url, headers=self.headers, timeout=3,
                                 data=operation)

        return_object = json.loads(json.loads(response.text)["string"])

        if int(str(return_object["errorCode"])) == 200:
            # print("job id :", job_id, "done is ", str(return_object["data"]["isDone"]))
            logger.info("job id " + ":" + str(job_id) + "---" + "done is " + str(return_object["data"]["isDone"]))
            return str(return_object["data"]["isDone"])
        else:
            return None
Ejemplo n.º 6
0
    def is_error(self, job_id, url):
        """
        Check job is done
        :param job_id:
        :return:
        """
        para = Parameter.parameter(0, job_id)
        friday_run = para.is_error()

        encode_str = DataEncoding.DataEncoding(json.dumps(friday_run))
        encode_body = encode_str.DesEncrypt()

        self.data["data"] = str(encode_body.replace(b'\n', b'').decode('utf-8'))

        operation = json.dumps(self.data)

        response = requests.post(url, headers=self.headers, timeout=3,
                                 data=operation)

        return_object = json.loads(json.loads(response.text)["string"])

        # print(return_object)
        job_str = "job id: " + str(job_id)
        # print(str(job_str).center(50, '='))
        number = 0
        if int(str(return_object["errorCode"])) == 200:
            items = list(return_object["data"]["jobProcessedRecordExecResultList"])
            # print(str(items).center(50, '='))
            for item in items:
                if int(item["errorCode"]) != 200:
                    number += 1
                    logger.info(item)
                    # logger.info(str(item))
            logger.info((str(job_str) + ' error number: ' + str(number)).center(50, '='))
Ejemplo n.º 7
0
    def run_ad(self, job_id, url, total_count):
        """
        Get job executing statistic result
        :param job_id:
        :return:
        """
        total_processed_count = 0
        para = Parameter.parameter(0, job_id)
        friday_run = para.go_on_ad_to_order()

        encode_str = DataEncoding.DataEncoding(json.dumps(friday_run))
        encode_body = encode_str.DesEncrypt()

        self.data["data"] = str(encode_body.replace(b'\n', b'').decode('utf-8'))

        operation = json.dumps(self.data)

        response = requests.post(url, headers=self.headers, timeout=3,
                                 data=operation)

        return_object = json.loads(json.loads(response.text)["string"])

        if int(str(return_object["errorCode"])) == 200:
            if return_object["data"]["totalAdCount"] is not None:
                total_ad_count = list(return_object["data"]["totalAdCount"])
                # print("*******************************************")
                # print(total_ad_count)
                for item in total_ad_count:
                    country_item = dict(item)
                    # print(country_item["countryCode"], country_item["totalProcessedCount"])
                    logger.info(country_item["countryCode"] + "---" + str(country_item["totalProcessedCount"]))
                    total_processed_count += int(country_item["totalProcessedCount"])

        # print("job id :", job_id, "total count :", total_processed_count)
        logger.info("job id:" + str(job_id) + " --- total count: " + str(total_count) + ", Run total count: " + str(total_processed_count))
    def simulate(self, sim_length):
        """ simulate the patient over the specified simulation length """

        # random number generator for this patient
        self._rng = rndClasses.RNG(self._id)

        k = 0  # current time step

        # while the patient is alive and simulation length is not yet reached
        while self._stateMonitor.get_if_alive(
        ) and k * self._delta_t < sim_length:

            # find the transition probabilities of the future states
            trans_probs = self._param.get_transition_prob(
                self._stateMonitor.get_current_state())
            # create an empirical distribution
            empirical_dist = rndClasses.Empirical(trans_probs)
            # sample from the empirical distribution to get a new state
            # (returns an integer from {0, 1, 2, ...})
            new_state_index = empirical_dist.sample(self._rng)

            # update health state
            self._stateMonitor.update(k, P.HealthStats(new_state_index))

            # increment time step
            k += 1
Ejemplo n.º 9
0
    def onAdd(self):
        if self.isFormValid():
            a = Parameter.createParameter()

            name = self.form.nameLineEdit.text()
            if name:
                a.Name = name

            a.ObjectLabel = str(self.form.objectComboBox.currentText())
            a.ObjectProperty = str(self.form.propertyComboBox.currentText())

            if self.form.minRangeCheckBox.isChecked():
                a.MinRangeEnabled = True
                a.MinRange = self.form.minRangeSpinBox.value()

            if self.form.maxRangeCheckBox.isChecked():
                a.MaxRangeEnabled = True
                a.MaxRange = self.form.maxRangeSpinBox.value()

            a.Value = self.form.valueSpinBox.value()

            # Reset default widget
            self.default()
        else:
            FreeCAD.Console.PrintError("Invalid data. Could not create parameter.\n")
def setParamSpec():
    '''Set span and center frequency'''
    try:
        span_tmp = float(P.spanEntry.get())
    except:
        span_tmp = P.Params['span']
        P.spanEntry.delete(0, END)
    try:
        center_freq_tmp = float(P.CFEntry.get())
    except:
        center_freq_tmp = P.Params['center_freq']
        CFEntry.delete(0, END)

    if span_tmp <= 1920 and span_tmp > 0 and center_freq_tmp <= 6000 and center_freq_tmp >= 70:
        if P.Params['center_freq'] != center_freq_tmp:
            P.Params['center_freq'] = round(center_freq_tmp, 4)  #最多保留4位小数
            ParaSetCliSock.set_param('rx_freq', P.Params['center_freq'])

        P.Params['span'] = span_tmp
        P.cfspanReset()

        P.as_spec.set_xlim(P.Params['cf_start'], P.Params['cf_end'])
        P.as_spec.set_xticks(
            np.linspace(P.Params['cf_start'], P.Params['cf_end'], 11))
        P.as_spec.set_xticklabels([''] * 11)
        P.as_spec.grid(True, color='k', linewidth='1.5')

        # CF
        CF_label = ' CF: {}MHz'.format(P.Params['center_freq'])
        P.CFLabel['text'] = CF_label

        # span
        span_label = ' Span: {}KHz'.format(P.Params['span'])
        P.spanLabel['text'] = span_label
    elif span_tmp > 1920 or span_tmp <= 0:
        P.spanEntry.delete(0, END)
        P.spanEntry.insert(0, P.Params['span'])
        showerror('error', 'The span is out of range! Please enter again.')
    else:
        P.CFEntry.delete(0, END)
        P.CFEntry.insert(0, P.Params['center_freq'])
        showerror('error',
                  'The center frequency is out of range! Please enter again.')
Ejemplo n.º 11
0
    def _setup(self):
        # Terrible temporary solution to an issue regarding compacting weights re: CUDNN RNN
        if issubclass(type(self.module), torch.nn.RNNBase):
            self.module.flatten_parameters = self.widget_demagnetizer_y2k_edition

        for name_w in self.weights:
            print('Applying weight drop of {} to {}'.format(self.dropout, name_w))
            w = getattr(self.module, name_w)
            del self.module._parameters[name_w]
            self.module.register_parameter(name_w + '_raw', Parameter(w.data))
Ejemplo n.º 12
0
 def cosmomc2april(self, line):
     plist = [
         Parameter("Obh2", line['omegabh2']),
         Parameter("Om", line['omegam*']),
         Parameter("h", line['H0*'] / 100.)
     ]
     if "nnu" in self.names:
         plist.append(Parameter("Nnu", line['nnu']))
     if "w" in self.names:
         plist.append(Parameter("w", line['w']))
     if "wa" in self.names:
         plist.append(Parameter("wa", line['wa']))
     if "omegak" in self.names:
         plist.append(Parameter("Ok", line['omegak']))
     if "mnu" in self.names:
         plist.append(Parameter("mnu", line['mnu']))
     return plist
    def __init__(self, id, therapy):
        """ create a cohort of patients
        :param id: an integer to specify the seed of the random number generator
        """
        self._initial_pop_size = Data.POP_SIZE
        self._patients = []  # list of patients

        # populate the cohort
        for i in range(self._initial_pop_size):
            # create a new patient (use id * pop_size + i as patient id)
            #if Data.PSA_ON:
            #patient = Patient(id * self._initial_pop_size + i, P.ParametersProbabilistic(i, therapy))
            #else:
            patient = Patient(id * self._initial_pop_size + i,
                              P.ParametersFixed(therapy))
            # add the patient to the cohort
            self._patients.append(patient)
Ejemplo n.º 14
0
	def __init__(self, joint_handles):
		# constants
		self.idle_wait_time = 4
		self.idle_gap = 3.0
		self.propagate_gap = 2.0
		self.light_duration = 4 # unit: second
		self.sma_duration = 4 # unit:second

		self.state = 0 # set it to idle at first
		self.active_list = []
		self.propagate_list = []
		self.idle_event_start_time = 0.0
		self.active_event_start_time = 0.0

		self.sma_handles = joint_handles
		self.parameter_list = []
		for handle in self.sma_handles:
			self.parameter_list.append(Parameter('sma'))
Ejemplo n.º 15
0
 def fitparameter(self):
     """
     :rtype: Parameter
     """
     return Parameter(
         fmin=self.fmin.get_value(),
         fmax=self.fmax.get_value(),
         df=self.df.get_value(),
         raster=self.parser.getboolean(rst, 'Raster'),
         pixel=self.pixel.get_value_as_int(),
         dim=self.dim.get_value(),
         spektroskopie=self.parser.getboolean(man, 'Spektroskopie'),
         hysterese=self.parser.getboolean(man, 'Hysterese'),
         dcmin=self.parser.getfloat(man, 'Umin'),
         dcmax=self.parser.getfloat(man, 'Umax'),
         ddc=self.parser.getfloat(man, 'dU'),
         mittelungen=self.mittelungen.get_value_as_int(),
         amp_fitfkt=self.combobox('methode_amp').get_active(),
         ph_fitfkt=self.combobox('methode_phase').get_active(),
         filter_breite=self.spinbutton('savgol_koeff').get_value_as_int(),
         filter_ordnung=self.spinbutton('savgol_ordnung').get_value_as_int(),
         phase_versatz=self.spinbutton('phase_versatz').get_value(),
         bereich_min=self.bereich_min.get_value(),
         bereich_max=self.bereich_max.get_value(),
         amp=Fitparameter(
             guete_min=self.spinbutton('q_min').get_value(),
             guete_max=self.spinbutton('q_max').get_value(),
             off_min=self.spinbutton('off_min').get_value(),
             off_max=self.spinbutton('off_max').get_value()
         ),
         amp_min=self.spinbutton('amp_min').get_value(),
         amp_max=self.spinbutton('amp_max').get_value(),
         phase=Fitparameter(
             guete_min=self.spinbutton('phase_q_min').get_value(),
             guete_max=self.spinbutton('phase_q_max').get_value(),
             off_min=self.spinbutton('phase_off_min').get_value(),
             off_max=self.spinbutton('phase_off_max').get_value()
         ),
         konf=self.konf,
         kanal=self.kanal,
         version=self.version
     )
Ejemplo n.º 16
0
    def fitparameter(self):
        """
        :rtype: Parameter
        """
        fmin = self.fmin.get_value()
        fmax = self.fmax.get_value()
        df = self.df.get_value()

        return Parameter(
            fmin=fmin,
            fmax=fmax,
            df=df,
            pixel=self.pixel.get_value_as_int(),
            dim=self.dim.get_value(),
            mittelungen=self.mittelungen.get_value_as_int(),
            amp_fitfkt=self.combobox('methode_amp').get_active(),
            ph_fitfkt=self.combobox('methode_phase').get_active(),
            filter_breite=self.spinbutton('savgol_koeff').get_value_as_int(),
            filter_ordnung=self.spinbutton(
                'savgol_ordnung').get_value_as_int(),
            phase_versatz=self.spinbutton('phase_versatz').get_value(),
            bereich_min=self.bereich_min.get_value(),
            bereich_max=self.bereich_max.get_value(),
            amp=Fitparameter(guete_min=self.spinbutton('q_min').get_value(),
                             guete_max=self.spinbutton('q_max').get_value(),
                             off_min=self.spinbutton('off_min').get_value(),
                             off_max=self.spinbutton('off_max').get_value()),
            amp_min=self.spinbutton('amp_min').get_value(),
            amp_max=self.spinbutton('amp_max').get_value(),
            phase=Fitparameter(
                guete_min=self.spinbutton('phase_q_min').get_value(),
                guete_max=self.spinbutton('phase_q_max').get_value(),
                off_min=self.spinbutton('phase_off_min').get_value(),
                off_max=self.spinbutton('phase_off_max').get_value()),
            konf=self.konf,
            version=self.version)
Ejemplo n.º 17
0
def predictAllShop_ANN_part_together(all_data,
                                     trainAsTest=False,
                                     saveFilePath=None,
                                     featurePath=None,
                                     cate_level=0,
                                     cate_name=None,
                                     featureSavePath=None,
                                     needSaveFeature=False,
                                     time=1):
    """
    使用所有商家所有数据训练,预测所有商店
    :param trainAsTest: 是否使用训练集后14天作为测试集
    :param model: 某个模型
    :param saveFilePath
    :param featurePath:
    :param cate_level:
    :param cate_name:
    :param featureSavePath:
    :param needSaveFeature:
    :param time:跑第几次
    :return:
    """

    ignores = 0

    shopids = None
    shop_need_to_predict = 2000
    if (cate_level is 0):
        shopids = range(1, 1 + shop_need_to_predict, 1)
    else:
        shopids = Parameter.extractShopValueByCate(cate_level, cate_name)

    shop_info = pd.read_csv(Parameter.shopinfopath,
                            names=[
                                "shopid", "cityname", "locationid", "perpay",
                                "score", "comment", "level", "cate1", "cate2",
                                "cate3"
                            ])
    weekOrWeekend = True
    day_back_num = 21
    sameday_backNum = 7
    week_backnum = 3
    other_features = [statistic_functon_mean, statistic_functon_median]
    other_features = []
    '''将cate1 onehot'''
    cate = shop_info['cate1'].tolist()
    cate_dup = set(cate)
    cates = []
    for i in range(len(cate_dup)):
        cates.append([i])
    hot_encoder = OneHotEncoder().fit(cates)
    dicts = dict(zip(cate_dup, range(len(cate_dup))))
    cate_num = []
    for c in cate:
        cate_num.append([dicts[c]])
    '''cate1 onehot finish'''

    if featurePath is None:

        all_x = None
        all_y = None
        for shopid in shopids:
            if shopid in Parameter.ignore_shopids:
                print "ignore get train", shopid
                ignores += 1
                continue
            print "get ", shopid, " train"
            part_data = all_data[all_data.shopid == shopid]
            last_14_real_y = None
            # 取出一部分做训练集
            if trainAsTest:  #使用训练集后14天作为测试集的话,训练集为前面部分
                last_14_real_y = part_data[len(part_data) -
                                           14:]["count"].values
                part_data = part_data[0:len(part_data) - 14]
            # print last_14_real_y
            skipNum = part_data.shape[0] - 128
            if skipNum < 0:
                skipNum = 0
            train_x = None
            if sameday_backNum != 0:
                sameday = extractBackSameday(part_data, sameday_backNum,
                                             skipNum, nan_method_sameday_mean)
                train_x = getOneWeekdayFomExtractedData(sameday)
            if day_back_num != 0:
                if train_x is not None:
                    train_x = np.concatenate(
                        (train_x,
                         getOneWeekdayFomExtractedData(
                             extractBackDay(part_data, day_back_num, skipNum,
                                            nan_method_sameday_mean))),
                        axis=1)
                else:
                    train_x = getOneWeekdayFomExtractedData(
                        extractBackDay(part_data, day_back_num, skipNum,
                                       nan_method_sameday_mean))
            if weekOrWeekend:
                ws = getOneWeekdayFomExtractedData(
                    extractWorkOrWeekend(part_data, skipNum))
                hot_encoder = onehot(ws)
                train_x = np.concatenate(
                    (train_x, hot_encoder.transform(ws).toarray()), axis=1)
            count = extractCount(part_data, skipNum)
            train_y = getOneWeekdayFomExtractedData(count)
            for feature in other_features:
                value = getOneWeekdayFomExtractedData(
                    extractBackWeekValue(part_data, week_backnum, skipNum,
                                         nan_method_sameday_mean, feature))
                train_x = np.append(train_x, value, axis=1)

            # '''添加商家信息'''
            # # print train_x,train_x.shape
            # index = shopid - 1
            # oneshopinfo = shop_info.ix[index]
            # shop_perpay = oneshopinfo['perpay'] if not pd.isnull(oneshopinfo['perpay']) else 0
            # shop_score = oneshopinfo['score'] if not pd.isnull(oneshopinfo['score']) else 0
            # shop_comment = oneshopinfo['comment'] if not pd.isnull(oneshopinfo['comment']) else 0
            # shop_level = oneshopinfo['level'] if not pd.isnull(oneshopinfo['level']) else 0
            # shop_cate1 = oneshopinfo['cate1']
            # import warnings
            # with warnings.catch_warnings():
            #     warnings.simplefilter("ignore",category=DeprecationWarning)
            #     shop_cate1_encoder = hot_encoder.transform([dicts[shop_cate1]]).toarray()
            # train_x = np.insert(train_x,train_x.shape[1],shop_perpay,axis=1)
            # train_x = np.insert(train_x,train_x.shape[1],shop_score,axis=1)
            # train_x = np.insert(train_x,train_x.shape[1],shop_comment,axis=1)
            # train_x = np.insert(train_x,train_x.shape[1],shop_level,axis=1)
            # for i in range(shop_cate1_encoder.shape[1]):
            #     train_x = np.insert(train_x,train_x.shape[1],shop_cate1_encoder[0][i],axis=1)
            # '''商家信息添加完毕'''

            if all_x is None:
                all_x = train_x
                all_y = train_y
            else:
                all_x = np.insert(all_x, all_x.shape[0], train_x, axis=0)
                all_y = np.insert(all_y, all_y.shape[0], train_y, axis=0)

                # '''添加周几'''
                # extract_weekday = getOneWeekdayFomExtractedData(extractWeekday(part_data, skipNum))
                # train_x = np.append(train_x, extract_weekday, axis=1)
                # ''''''

                # train_x = train_x.reshape((train_x.shape[0],
                #                            train_x.shape[1], 1))
                # print model.get_weights()
                # part_counts = []
                # for i in range(7):
                #     weekday = i + 1
                #     part_count = getOneWeekdayFomExtractedData(count, weekday)
                #     part_counts.append(part_count)

        train_x = all_x
        train_y = all_y

        if needSaveFeature:
            featureAndLabel = np.concatenate((train_x, train_y), axis=1)
            flDF = pd.DataFrame(
                featureAndLabel,
                columns=[
                    "sameday1", "sameday2", "sameday3", "week_mean1",
                    "week_mean2", "week_mean3", "week_median1", "week_median2",
                    "week_median3", "perpay", "score", "comment", "level",
                    "cate1_1", "cate1_2", "cate1_3", "cate1_4", "cate1_5",
                    "cate1_6", "label"
                ])
            if featureSavePath is None:
                if trainAsTest:
                    featureSavePath = "train_feature/%df_%d_%s.csv" % (
                        flDF.shape[1] - 1, cate_level, cate_name)
                else:
                    featureSavePath = "feature/%df_%d_%s.csv" % (
                        flDF.shape[1] - 1, cate_level, cate_name)
            flDF.to_csv(featureSavePath)
    else:  #有featurePath文件
        flDF = pd.read_csv(featurePath, index_col=0)
        train_x = flDF.values[:, :-1]
        train_y = flDF.values[:, -1:]
        # print train_x
        # print train_y
    '''将t标准化'''
    x_scaler = MinMaxScaler().fit(train_x)
    y_scaler = MinMaxScaler().fit(train_y)
    train_x = x_scaler.transform(train_x)
    train_y = y_scaler.transform(train_y)
    '''标准化结束'''
    '''构造神经网络'''
    h1_activation = "relu"
    rnn_epoch = 60
    verbose = 0
    h_unit = 16
    batch_size = 5
    np.random.seed(128)
    model = Sequential()
    model.add(
        Dense(h_unit,
              init="normal",
              input_dim=train_x.shape[1],
              activation=h1_activation))  #sigmoid
    model.add(
        Dense(1,
              init="normal",
              activation='linear',
              activity_regularizer=activity_l2(0.01)))
    sgd = SGD(0.005)
    # rmsprop = RMSprop(0.01)
    # adagrad = Adagrad(0.05)
    adadelta = Adadelta(0.01)
    adam = Adam(0.0001)
    adamax = Adamax(0.01)
    nadam = Nadam(0.01)
    model.compile(loss="mse", optimizer=adam)
    '''构造结束'''

    model.fit(train_x,
              train_y,
              nb_epoch=rnn_epoch,
              batch_size=batch_size,
              verbose=verbose)

    format = "%Y-%m-%d"
    if trainAsTest:
        startTime = datetime.datetime.strptime("2016-10-18", format)
    else:
        startTime = datetime.datetime.strptime("2016-11-1", format)
    timedelta = datetime.timedelta(1)
    '''预测所有商家'''
    preficts_all = None
    real_all = None
    for j in shopids:
        if j in Parameter.ignore_shopids:
            print "ignore predict", j
            continue
        print "predict:", j
        preficts = []
        part_data = all_data[all_data.shopid == j]
        last_14_real_y = None

        if trainAsTest:  #使用训练集后14天作为测试集的话,训练集为前面部分
            last_14_real_y = part_data[len(part_data) - 14:]["count"].values
            part_data = part_data[0:len(part_data) - 14]
        '''预测14天'''
        for i in range(14):
            currentTime = startTime + timedelta * i
            strftime = currentTime.strftime(format)
            # index = getWeekday(strftime) - 1
            # part_count = part_counts[index]
            #取前{sameday_backNum}周同一天的值为特征进行预测
            part_data = part_data.append(
                {
                    "count": 0,
                    "shopid": j,
                    "time": strftime,
                    "weekday": getWeekday(strftime)
                },
                ignore_index=True)
            x = None
            if sameday_backNum != 0:
                x = getOneWeekdayFomExtractedData(
                    extractBackSameday(part_data, sameday_backNum,
                                       part_data.shape[0] - 1,
                                       nan_method_sameday_mean))
            if day_back_num != 0:
                if x is None:
                    x = getOneWeekdayFomExtractedData(
                        extractBackDay(part_data, day_back_num,
                                       part_data.shape[0] - 1,
                                       nan_method_sameday_mean))
                else:
                    x = np.concatenate(
                        (x,
                         getOneWeekdayFomExtractedData(
                             extractBackDay(part_data, day_back_num,
                                            part_data.shape[0] - 1,
                                            nan_method_sameday_mean))),
                        axis=1)
            if weekOrWeekend:
                x = np.concatenate(
                    (x,
                     hot_encoder.transform(
                         getOneWeekdayFomExtractedData(
                             extractWorkOrWeekend(
                                 part_data,
                                 part_data.shape[0] - 1))).toarray()),
                    axis=1)
            for feature in other_features:
                x_value = getOneWeekdayFomExtractedData(
                    extractBackWeekValue(part_data, week_backnum,
                                         part_data.shape[0] - 1,
                                         nan_method_sameday_mean, feature))
                x = np.append(x, x_value, axis=1)
            # '''添加周几'''
            # x = np.append(x, getOneWeekdayFomExtractedData(extractWeekday(part_data, part_data.shape[0]-1)), axis=1)
            # ''''''

            # '''添加商家信息'''
            # index = j - 1
            # oneshopinfo = shop_info.ix[index]
            # shop_perpay = oneshopinfo['perpay'] if not pd.isnull(oneshopinfo['perpay']) else 0
            # shop_score = oneshopinfo['score'] if not pd.isnull(oneshopinfo['score']) else 0
            # shop_comment = oneshopinfo['comment'] if not pd.isnull(oneshopinfo['comment']) else 0
            # shop_level = oneshopinfo['level'] if not pd.isnull(oneshopinfo['level']) else 0
            # shop_cate1 = oneshopinfo['cate1']
            # import warnings
            # with warnings.catch_warnings():
            #     warnings.simplefilter("ignore",category=DeprecationWarning)
            #     shop_cate1_encoder = hot_encoder.transform([dicts[shop_cate1]]).toarray()
            # x = np.insert(x,x.shape[1],shop_perpay,axis=1)
            # x = np.insert(x,x.shape[1],shop_score,axis=1)
            # x = np.insert(x,x.shape[1],shop_comment,axis=1)
            # x = np.insert(x,x.shape[1],shop_level,axis=1)
            # for i in range(shop_cate1_encoder.shape[1]):
            #     x = np.insert(x,x.shape[1],shop_cate1_encoder[0][i],axis=1)
            # '''商家信息添加完毕'''

            x = x_scaler.transform(x)
            # for j in range(sameday_backNum):
            #     x.append(train_y[len(train_y) - (j+1)*7][0])
            # x = np.array(x).reshape((1, sameday_backNum))

            # print x
            # x = x.reshape(1, sameday_backNum, 1)
            predict = model.predict(x)
            if predict.ndim == 2:
                predict = y_scaler.inverse_transform(predict)[0][0]
            elif predict.ndim == 1:
                predict = y_scaler.inverse_transform(predict)[0]

            if (predict <= 0):
                predict == 1
            preficts.append(predict)
            part_data.set_value(part_data.shape[0] - 1, "count", predict)

        preficts = (removeNegetive(toInt(np.array(preficts)))).astype(int)
        if preficts_all is None:
            preficts_all = preficts
        else:
            preficts_all = np.insert(preficts_all,
                                     preficts_all.shape[0],
                                     preficts,
                                     axis=0)

        if trainAsTest:
            last_14_real_y = (removeNegetive(toInt(
                np.array(last_14_real_y)))).astype(int)
            if real_all is None:
                real_all = last_14_real_y
            else:
                real_all = np.insert(real_all,
                                     real_all.shape[0],
                                     last_14_real_y,
                                     axis=0)
                # print preficts,last_14_real_y
            print str(j) + ',score:', scoreoneshop(preficts, last_14_real_y)

    # preficts = np.array(preficts)
    preficts_all = preficts_all.reshape((len(shopids) - ignores, 14))
    if trainAsTest:
        real_all = real_all.reshape((len(shopids) - ignores, 14))
        preficts_all = np.concatenate((preficts_all, real_all), axis=1)
    shopids = shopids.tolist()
    for remove in Parameter.ignore_shopids:
        try:
            shopids.remove(remove)
        except:
            pass
    preficts_all = np.insert(preficts_all, 0, shopids, axis=1)
    if saveFilePath is not None:
        path = saveFilePath + "_%ds_%dd_%df_%d_%s_%d_%d_%d_%s_%dtime.csv" \
                              % (sameday_backNum, day_back_num, train_x.shape[1],cate_level,cate_name
                                 ,rnn_epoch,batch_size,h_unit,h1_activation,time)
        print "save in :", path
        np.savetxt(path, preficts_all, fmt="%d", delimiter=",")
    return preficts_all
Ejemplo n.º 18
0
def Main(contigfile_, tuple_of_bamfiles, tuple_of_means, tuple_of_thresholds, edge_support, read_len, cont_threshold,
          ratio, output_dest, std_dev, covcutoff, haplratio, haplthreshold, detect_haplotype, detect_duplicate, gff_file, fosmidpool, mapquality):

    from time import time
    import CreateGraph as CG
    import MakeScaffolds as MS
    import GenerateOutput as GO
    import Parameter
    from copy import deepcopy
    tot_start = time()
    F = [] #list of (ordered) lists of tuples containing (contig_name, direction, position, length, links). The tuple is a contig within a scaffold and the list of tuples is the scaffold.
    Scaffolds = {}     #scaffold dict with contig objects for easy fetching of all contigs in a scaffold
    n = len(tuple_of_bamfiles) # number of libraries we have
    param = Parameter.parameter() # object containing all parameters (user specified, defaulted and comuted along tha way.)
    param.scaffold_indexer = 1 # global indicator for scaffolds, used to index scaffolds when they are created
    param.map_quality = mapquality
    param.rel_weight = ratio
    Contigs = {} # contig dict that stores contig objects

    if not os.path.exists(output_dest):
        os.makedirs(output_dest)
    param.information_file = open(os.path.join(output_dest + 'Statistics.txt'), 'w')

    Information = param.information_file
    open(output_dest + '/haplotypes.fa', 'w')
    #Read in the sequences of the contigs in memory
    contigfile = open(contigfile_, 'r')
    C_dict = ReadInContigseqs(contigfile)
    #C_dict = {}
    param.gff_file = gff_file
#iterate over libraries
    param.first_lib = True
    for i in range(0, n):
        start = time()
        param.bamfile = tuple_of_bamfiles[i]
        param.mean_ins_size = tuple_of_means[i]
        param.ins_size_threshold = tuple_of_thresholds[i]
        param.edgesupport = edge_support[i]
        param.read_len = read_len[i]
        param.output_directory = output_dest
        param.std_dev_ins_size = std_dev[i]
        param.contig_threshold = cont_threshold[i]
        param.cov_cutoff = covcutoff[i]
        param.hapl_ratio = haplratio
        param.hapl_threshold = haplthreshold
        param.detect_haplotype = detect_haplotype
        param.detect_duplicate = detect_duplicate
        param.fosmidpool = fosmidpool
        print >> Information, '\nPASS ' + str(i + 1) + '\n\n'
        print 'Starting scaffolding with library: ', param.bamfile
        (G, Contigs, Scaffolds, F, param) = CG.PE(Contigs, Scaffolds, F, Information, output_dest, C_dict, param)      #Create graph, single out too short contigs/scaffolds and store them in F
        param.first_lib = False   #not the first lib any more
        if G == None:
            print '0 contigs/super-contigs passed the length criteria of this step. Exiting and printing results.. '
            break
        elapsed = time() - start
        print >> Information, 'Time elapsed for creating graph, iteration ' + str(i) + ': ' + str(elapsed) + '\n'
        start = time()
        (Contigs, Scaffolds, F, param) = MS.Algorithm(G, Contigs, Scaffolds, F, Information, C_dict, param)   # Make scaffolds, store the complex areas (consisting of contig/scaffold) in F, store the created scaffolds in Scaffolds, update Contigs
        elapsed = time() - start
        print >> Information, 'Time elapsed for making scaffolds, iteration ' + str(i) + ': ' + str(elapsed) + '\n'

        print 'Writing out scaffolding results for step', i + 1, ' ...'
        Scaffolds_copy = deepcopy(Scaffolds)
        Contigs_copy = deepcopy(Contigs)
        F_copy = deepcopy(F)
        for scaffold_ in Scaffolds_copy.keys(): #iterate over keys in hash, so that we can remove keys while iterating over it
            ###  Go to function and print to F
            ### Remove Scaf_obj from Scaffolds and Contig_obj from contigs
            S_obj = Scaffolds_copy[scaffold_]
            list_of_contigs = S_obj.contigs   #list of contig objects contained in scaffold object
            Contigs_copy, F_copy = GO.WriteToF(F_copy, Contigs_copy, list_of_contigs)
            del Scaffolds_copy[scaffold_]
        #print F
        GO.PrintOutput(F_copy, C_dict, Information, output_dest, param, i + 1)

    ### Calculate stats for last scaffolding step    
    scaf_lengths = [Scaffolds[scaffold_].s_length for scaffold_ in Scaffolds.keys()]
    sorted_lengths = sorted(scaf_lengths, reverse=True)
    N50, L50 = CG.CalculateStats(sorted_lengths, param)
    param.current_L50 = L50
    param.current_N50 = N50
#    ### Call a print scaffolds function here for remaining scaffolds that has "passed" all library levels
#    for scaffold_ in Scaffolds.keys(): #iterate over keys in hash, so that we can remove keys while iterating over it
#        ###  Go to function and print to F
#        ### Remove Scaf_obj from Scaffolds and Contig_obj from contigs
#        S_obj=Scaffolds[scaffold_]
#        list_of_contigs=S_obj.contigs   #list of contig objects contained in scaffold object
#        Contigs, F = GO.WriteToF(F,Contigs,list_of_contigs)
#        del Scaffolds[scaffold_]
#    #print F
#    GO.PrintOutput(F,C_dict,Information,output_dest)

    elapsed = time() - tot_start
    print >> Information, 'Total time for scaffolding: ' + str(elapsed) + '\n'
    print 'Finished\n\n '
Ejemplo n.º 19
0
__author__ = "github.com/wardsimon"
__version__ = "0.0.1"

import numpy as np

from curly-telegram.interface import Interface, calculators_list
from curly-telegram.Objects.fitting import Model, Parameter


x = np.linspace(0, 10, 100)
y = 3.0 * x + 2.0 + np.random.normal(-1.0, 1.0, len(x))

p1 = Parameter("m", 1.5)
p2 = Parameter("c", 0.5)

f = lambda x, m, c: m * x + c  # noqa: E731
m = Model(f, [p1, p2])

interface = Interface(model=m)
interface.x = x
interface.y = y
interface.ftol = 1e-4
interface.set_calculator("scipy")
interface.fit()
interface.plot()

for calc in calculators_list:
    interface.set_calculator(calc.name)
    interface.fit()
    interface.plot()
Ejemplo n.º 20
0
def predictAllShop_MultiCNN_HPS(all_data,
                                trainAsTest=False,
                                saveFilePath=None,
                                featurePath=None,
                                cate_level=0,
                                cate_name=None,
                                featureSavePath=None,
                                needSaveFeature=False,
                                ignore_shopids=[],
                                needCV=False,
                                model_path=None,
                                Augmented=False,
                                ignore_get_train=True,
                                ignore_predict=True,
                                addNoiseInResult=False,
                                time=1):
    """
    通过gridsearch找超参数
    :param trainAsTest: 是否使用训练集后14天作为测试集
    :param saveFilePath
    :param featurePath:
    :param cate_level:
    :param cate_name:
    :param featureSavePath:
    :param needSaveFeature:
    :param ignore_shopids:
    :param create_model_function:
    :param needCV
    :param Augmented:是否增广样本
    :param  ignore_get_train:是否忽略获取样本
    :param ignore_predict:是否忽略预测
    :return:
    """

    augument_time = 1
    verbose = 2
    last_N_days = 70
    #记录已经被忽略的商店数量
    # ignores = 0
    shop_need_to_predict = 2000
    if (cate_level is 0):
        shopids = np.arange(1, 1 + shop_need_to_predict, 1)
    else:
        shopids = Parameter.extractShopValueByCate(cate_level, cate_name)
    shop_info = pd.read_csv(Parameter.shopinfopath,
                            names=[
                                "shopid", "cityname", "locationid", "perpay",
                                "score", "comment", "level", "cate1", "cate2",
                                "cate3"
                            ])

    weather = False
    weekOrWeekend = False
    day_back_num = 21
    sameday_backNum = 8
    week_backnum = 3
    other_features = [statistic_functon_mean, statistic_functon_median]
    other_features = []
    shop_features = ["perpay", "comment", "score", "level"]
    shop_features = []
    #是否是周末hot_encoder
    hot_encoder = onehot([[1], [0]])
    #类别1hot_encoder
    cate1_list = np.unique(shop_info['cate1'])
    cate1_label_encoder = labelEncoder(cate1_list)
    cate1_list2 = cate1_label_encoder.transform(cate1_list).reshape((-1, 1))
    cate1_hot_encoder = onehot(cate1_list2)

    if featurePath is None:
        train_x, train_y = getTrainXY(
            all_data, cate1_hot_encoder, cate1_label_encoder, day_back_num,
            hot_encoder, ignore_get_train, ignore_shopids, last_N_days,
            other_features, 0, shop_features, shop_info, shopids, trainAsTest,
            weather, weekOrWeekend, week_backnum)
        train_x2 = getTrainXY(all_data, cate1_hot_encoder, cate1_label_encoder,
                              0, hot_encoder, ignore_get_train, ignore_shopids,
                              last_N_days, other_features, sameday_backNum,
                              shop_features, shop_info, shopids, trainAsTest,
                              weather, weekOrWeekend, week_backnum)[0]
        """增广训练集"""
        if Augmented:
            train_xs, train_y = augmentTrainX(augument_time,
                                              [train_x, train_x2], train_y)

        train_x = train_xs[0]
        train_x2 = train_xs[1]

        if needSaveFeature:
            featureAndLabel = np.concatenate((train_x, train_y), axis=1)
            flDF = pd.DataFrame(featureAndLabel)
            if featureSavePath is None:
                if trainAsTest:
                    featureSavePath = Parameter.projectPath + "lzj/train_feature/%dCatelevel_%sCatename_%dfeatures_%dSameday_%dDay_%dLast" % (
                        cate_level, cate_name, flDF.shape[1] - 1,
                        sameday_backNum, day_back_num, last_N_days)
                else:
                    featureSavePath = Parameter.projectPath + "lzj/feature/%dCatelevel_%sCatename_%dfeatures_%dSameday_%dDay_%dLast" % (
                        cate_level, cate_name, flDF.shape[1] - 1,
                        sameday_backNum, day_back_num, last_N_days)
            if Augmented:
                featureSavePath += ("_Augment%d" % augument_time)

            featureSavePath += ".csv"
            print "save feature in :", featureSavePath
            flDF.to_csv(featureSavePath)
    else:  #有featurePath文件
        if trainAsTest:
            path = Parameter.projectPath + "lzj/train_feature/" + featurePath
        else:
            path = Parameter.projectPath + "lzj/feature/" + featurePath
        flDF = pd.read_csv(path, index_col=0)
        train_x = flDF.values[:, :-1]
        train_y = flDF.values[:, -1:]
        # print train_x
        # print train_y
    '''将t标准化'''
    x_scaler = MinMaxScaler().fit(train_x)
    x2_scaler = MinMaxScaler().fit(train_x2)
    y_scaler = MinMaxScaler().fit(train_y)
    train_x = x_scaler.transform(train_x)
    train_x2 = x2_scaler.transform(train_x2)
    train_y = y_scaler.transform(train_y)
    '''标准化结束'''
    """CNN"""
    train_x = np.reshape(train_x, (train_x.shape[0], train_x.shape[1], 1))
    train_x2 = np.reshape(train_x2, (train_x2.shape[0], train_x2.shape[1], 1))

    if model_path is None:
        if needCV:
            '''gridsearchCV'''
            # nb_epoch=rnn_epoch, batch_size=batch_size, verbose=verbose
            # input_dim, h1_unit = 16, optimizer = "adagrad", init = "normal"):
            input_dim = [(train_x.shape[1], train_x.shape[2])]
            input_dim2 = [(train_x2.shape[1], train_x2.shape[2])]
            h1_acqtivation = ["relu"]
            h1_unit = [8, 12, 16, 20]
            model = KerasRegressor(build_fn=create_model_MultiCNN,
                                   verbose=verbose)
            batch_size = [3, 5, 7, 10]
            epochs = [10, 15, 20, 25, 30]
            param_grid = dict(batch_size=batch_size,
                              nb_epoch=epochs,
                              h1_unit=h1_unit,
                              input_shape1=input_dim,
                              input_shape2=input_dim2)
            grid = GridSearchCV(estimator=model,
                                param_grid=param_grid,
                                n_jobs=-1,
                                scoring="neg_mean_squared_error")
            grid.refit = False
            grid_result = grid.fit(train_x, train_y)

            print("Best: %f using %s" %
                  (grid_result.best_score_, grid_result.best_params_))
            for params, mean_score, scores in grid_result.grid_scores_:
                print("%f (%f) with: %r" %
                      (scores.mean(), scores.std(), params))

        if not needCV:
            input_dim = (train_x.shape[1], train_x.shape[2])
            input_dim2 = (train_x2.shape[1], train_x2.shape[2])
            # h1_unit = 16 + (time) * 4
            h1_unit = 24
            h1_activation = "relu"
            batch_size = 3
            epochs = 40

        else:
            input_dim = (train_x.shape[1], train_x.shape[2])
            input_dim2 = (train_x2.shape[1], train_x2.shape[2])
            epochs = grid_result.best_params_['nb_epoch']
            batch_size = grid_result.best_params_['batch_size']
            h1_unit = grid_result.best_params_["h1_unit"]
            h1_activation = "sigmoid"

        print train_x.shape
        print train_x2.shape
        print train_y.shape

        early_stopping = EarlyStopping(monitor='val_loss', patience=2)
        best_model = create_model_MultiCNN(input_shape1=input_dim,
                                           input_shape2=input_dim2,
                                           h1_unit=h1_unit,
                                           h1_activation=h1_activation)
        hist = best_model.fit([train_x, train_x2],
                              train_y,
                              verbose=verbose,
                              batch_size=batch_size,
                              nb_epoch=epochs,
                              validation_split=0.1,
                              callbacks=[early_stopping])
        print hist.history

        #保存模型
        if trainAsTest:
            model_save_path = Parameter.projectPath+"lzj/train_model/" + \
                              "%dlast_%ds_%dd_%df_%d_%s_%d_%d_%d_%s.json" \
                              % (last_N_days,sameday_backNum, day_back_num, train_x.shape[1], cate_level, cate_name
                                 , epochs, batch_size, h1_unit, h1_activation)
            saveModel(model_save_path, best_model)
        else:
            model_save_path = Parameter.projectPath+"lzj/model/" + \
                              "%dlast_%ds_%dd_%df_%d_%s_%d_%d_%d_%s.json" \
                              % (last_N_days,sameday_backNum, day_back_num, train_x.shape[1], cate_level, cate_name
                                 ,  epochs, batch_size, h1_unit, h1_activation)
            saveModel(model_save_path, best_model)
    else:  #model_path is not none
        print "get model from " + model_path
        best_model = getModel(model_path)

    format = "%Y-%m-%d"
    if trainAsTest:
        startTime = datetime.datetime.strptime("2016-10-18", format)
    else:
        startTime = datetime.datetime.strptime("2016-11-1", format)
    timedelta = datetime.timedelta(1)
    '''预测商家'''
    model = best_model
    preficts_all = None
    real_all = None

    for j in shopids:
        if ignore_predict:
            if j in ignore_shopids:
                print "ignore predict", j
                # ignores += 1
                continue
        print "predict:", j
        preficts = []
        part_data = all_data[all_data.shopid == j]
        last_14_real_y = None

        if trainAsTest:  #使用训练集后14天作为测试集的话,训练集为前面部分
            last_14_real_y = part_data[len(part_data) - 14:]["count"].values
            part_data = part_data[0:len(part_data) - 14]
        '''预测14天'''
        for i in range(14):
            currentTime = startTime + timedelta * i
            strftime = currentTime.strftime(format)
            # index = getWeekday(strftime) - 1
            # part_count = part_counts[index]
            #取前{sameday_backNum}周同一天的值为特征进行预测
            part_data = part_data.append(
                {
                    "count": 0,
                    "shopid": j,
                    "time": strftime,
                    "weekday": getWeekday(strftime)
                },
                ignore_index=True)
            x = None
            x2 = None
            x = getOneShopTrainXY(cate1_hot_encoder, cate1_label_encoder,
                                  day_back_num, hot_encoder, other_features,
                                  part_data, 0, shop_features, shop_info, j,
                                  part_data.shape[0] - 1, x, weather,
                                  weekOrWeekend, week_backnum)[0]
            x2 = getOneShopTrainXY(cate1_hot_encoder, cate1_label_encoder, 0,
                                   hot_encoder, other_features, part_data,
                                   sameday_backNum, shop_features, shop_info,
                                   j, part_data.shape[0] - 1, x2, weather,
                                   weekOrWeekend, week_backnum)[0]

            x = x_scaler.transform(x)
            x2 = x2_scaler.transform(x2)
            """CNN"""
            x = np.reshape(x, (x.shape[0], x.shape[1], 1))
            x2 = np.reshape(x2, (x2.shape[0], x2.shape[1], 1))
            predict = model.predict([x, x2])
            '''将y还原'''
            if predict.ndim == 2:
                predict = y_scaler.inverse_transform(predict)[0][0]
            elif predict.ndim == 1:
                predict = y_scaler.inverse_transform(predict)[0]
            '''将y还原结束'''
            # print predict
            if (predict <= 0):
                predict == 0
            if addNoiseInResult:
                predict = predict * (
                    1 + 0.05 * abs(np.random.normal(scale=(i + 1) * 0.05)))
            preficts.append(predict)
            part_data.set_value(part_data.shape[0] - 1, "count", predict)

        preficts = (removeNegetive(toInt(np.array(preficts)))).astype(int)
        if preficts_all is None:
            preficts_all = preficts
        else:
            preficts_all = np.insert(preficts_all,
                                     preficts_all.shape[0],
                                     preficts,
                                     axis=0)

        if trainAsTest:
            last_14_real_y = (removeNegetive(toInt(
                np.array(last_14_real_y)))).astype(int)
            if real_all is None:
                real_all = last_14_real_y
            else:
                real_all = np.insert(real_all,
                                     real_all.shape[0],
                                     last_14_real_y,
                                     axis=0)
                # print preficts,last_14_real_y
            print str(j) + ',score:', scoreoneshop(preficts, last_14_real_y)

    # preficts = np.array(preficts)
    shopids = shopids.tolist()
    if ignore_predict:
        for remove_id in ignore_shopids:
            try:
                shopids.remove(remove_id)
            except:
                pass

    preficts_all = preficts_all.reshape((len(shopids), 14))
    if trainAsTest:
        real_all = real_all.reshape((len(shopids), 14))
        preficts_all = np.concatenate((preficts_all, real_all), axis=1)

    preficts_all = np.insert(preficts_all, 0, shopids, axis=1)
    if saveFilePath is not None:
        if model_path is None:
            path = saveFilePath + "%dLast_%ds_%dd_%df_%d_%s_%d_%d_%d_%s_%dshops" \
                                  % (last_N_days,sameday_backNum, day_back_num, train_x.shape[1], cate_level, cate_name
                                     ,  epochs, batch_size, h1_unit, h1_activation,len(shopids))
        else:
            import re
            r = re.compile(
                r"""/(\d+)last_(\d+)s_(\d+)d_(\d+)f_(\d+)_(\S+)_(\d+)_(\d+)_(\d+)_(\w+).json"""
            )
            m = r.search(model_path)
            path = saveFilePath + "%dLast_%ds_%dd_%df_%d_%s_%d_%d_%d_%s_%dshops" \
                                  % (int(m.group(1)),int(m.group(2)), int(m.group(3)), int(m.group(4)), int(m.group(5)), m.group(6)
                                     ,  int(m.group(7)), int(m.group(8)), int(m.group(9)), m.group(10),len(shopids))
        if Augmented:
            path += "_augmented"
        if addNoiseInResult:
            path += "_addNoiseInResult"
        path = path + "_%dtime" % time
        if trainAsTest:
            path = path + "_train"
        path += ".csv"

        print "save in :", path
        np.savetxt(path, preficts_all, fmt="%d", delimiter=",")
    return preficts_all
Ejemplo n.º 21
0
                        'K_MIN': symbolset.ix[i, 'K_MIN'],
                        'startdate': symbolset.ix[i, 'startdate'],
                        'enddate': symbolset.ix[i, 'enddate'],
                        'result_para_dic': Parameter.result_para_dic
                    }
                    forward_mode_dic = {}
                    for k, v in Parameter.forward_mode_para_dic.items():
                        enable = symbolset.ix[i, k]
                        if enable:
                            sub_stop_loss_dic = {}
                            for k1 in v.keys():
                                if k1 == k:
                                    sub_stop_loss_dic[k1] = True
                                else:
                                    sub_stop_loss_dic[
                                        k1] = Parameter.para_str_to_float(
                                            symbolset.ix[i, k1])
                            forward_mode_dic[k] = sub_stop_loss_dic
                    symbol_para_dic['forward_mode_dic'] = forward_mode_dic
                    strategy_forward_para.append(symbol_para_dic)
                strategyParameterSet[strategy_name] = strategy_forward_para

    for strategy_name, strategy_bt_parameter in strategyParameterSet.items():
        strategy_folder = "%s%s\\" % (Parameter.root_path, strategy_name)
        for strategyParameter in strategy_bt_parameter:

            strategy_name = strategyParameter['strategy_name']
            exchange_id = strategyParameter['exchange_id']
            sec_id = strategyParameter['sec_id']
            bar_type = strategyParameter['K_MIN']
            startdate = strategyParameter['startdate']
            enddate = strategyParameter['enddate']
Ejemplo n.º 22
0
def predictAllShop_LC_HPS(all_data,
                          trainAsTest=False,
                          saveFilePath=None,
                          featurePath=None,
                          cate_level=0,
                          cate_name=None,
                          featureSavePath=None,
                          needSaveFeature=False,
                          ignore_shopids=[],
                          needCV=False,
                          model_path=None,
                          Augmented=False,
                          ignore_get_train=True,
                          ignore_predict=True,
                          addNoiseInResult=False,
                          time=1):
    """
    通过gridsearch找超参数
    :param trainAsTest: 是否使用训练集后14天作为测试集
    :param saveFilePath
    :param featurePath:
    :param cate_level:
    :param cate_name:
    :param featureSavePath:
    :param needSaveFeature:
    :param ignore_shopids:
    :param create_model_function:
    :param needCV
    :param Augmented:是否增广样本
    :param  ignore_get_train:是否忽略获取样本
    :param ignore_predict:是否忽略预测
    :return:
    """

    augument_time = 1
    verbose = 2
    last_N_days = 60
    #记录已经被忽略的商店数量
    # ignores = 0
    shop_need_to_predict = 2000
    if (cate_level is 0):
        shopids = np.arange(1, 1 + shop_need_to_predict, 1)
    else:
        shopids = Parameter.extractShopValueByCate(cate_level, cate_name)
    shop_info = pd.read_csv(Parameter.shopinfopath,
                            names=[
                                "shopid", "cityname", "locationid", "perpay",
                                "score", "comment", "level", "cate1", "cate2",
                                "cate3"
                            ])

    weather = False
    weekOrWeekend = False
    day_back_num = 21
    sameday_backNum = 0
    week_backnum = 3
    other_features = [statistic_functon_mean, statistic_functon_median]
    other_features = []
    shop_features = ["perpay", "comment", "score", "level"]
    shop_features = []
    #是否是周末hot_encoder
    hot_encoder = onehot([[1], [0]])
    #类别1hot_encoder
    cate1_list = np.unique(shop_info['cate1'])
    cate1_label_encoder = labelEncoder(cate1_list)
    cate1_list2 = cate1_label_encoder.transform(cate1_list).reshape((-1, 1))
    cate1_hot_encoder = onehot(cate1_list2)

    if featurePath is None:
        all_x = None
        all_y = None
        for shopid in shopids:
            if ignore_get_train:
                if shopid in ignore_shopids:
                    print "ignore get train", shopid
                    continue
            print "get ", shopid, " train"
            part_data = all_data[all_data.shopid == shopid]
            last_14_real_y = None
            # 取出一部分做训练集
            if trainAsTest:  #使用训练集后14天作为测试集的话,训练集为前面部分
                last_14_real_y = part_data[len(part_data) -
                                           14:]["count"].values
                part_data = part_data[0:len(part_data) - 14]
            # print last_14_real_y
            '''确定跳过前面多少天的数据'''
            skipNum = part_data.shape[0] - last_N_days
            if skipNum < 0:
                skipNum = 0
            train_x = None
            '''获取特征'''
            if sameday_backNum != 0:  #sameday
                sameday = extractBackSameday(part_data, sameday_backNum,
                                             skipNum, nan_method_sameday_mean)
                train_x = getOneWeekdayFomExtractedData(sameday)
            if day_back_num != 0:  #day
                if train_x is not None:
                    train_x = np.concatenate(
                        (train_x,
                         getOneWeekdayFomExtractedData(
                             extractBackDay(part_data, day_back_num, skipNum,
                                            nan_method_sameday_mean))),
                        axis=1)
                else:
                    train_x = getOneWeekdayFomExtractedData(
                        extractBackDay(part_data, day_back_num, skipNum,
                                       nan_method_sameday_mean))
            if weekOrWeekend:  #weekOrWeekend
                ws = getOneWeekdayFomExtractedData(
                    extractWorkOrWeekend(part_data, skipNum))
                train_x = np.concatenate((train_x, hot_encoder.transform(ws)),
                                         axis=1)

            count = extractCount(part_data, skipNum)
            train_y = getOneWeekdayFomExtractedData(count)
            for feature in other_features:
                value = getOneWeekdayFomExtractedData(
                    extractBackWeekValue(part_data, week_backnum, skipNum,
                                         nan_method_sameday_mean, feature))
                train_x = np.append(train_x, value, axis=1)
            '''添加商家信息'''
            # print train_x,train_x.shape
            index = shopid - 1
            oneshopinfo = shop_info.ix[index]
            shop_city = oneshopinfo['cityname']
            shop_perpay = oneshopinfo['perpay'] if not pd.isnull(
                oneshopinfo['perpay']) else 0
            shop_score = oneshopinfo['score'] if not pd.isnull(
                oneshopinfo['score']) else 0
            shop_comment = oneshopinfo['comment'] if not pd.isnull(
                oneshopinfo['comment']) else 0
            shop_level = oneshopinfo['level'] if not pd.isnull(
                oneshopinfo['level']) else 0
            shop_cate1 = oneshopinfo['cate1']
            import warnings
            with warnings.catch_warnings():
                warnings.simplefilter("ignore", category=DeprecationWarning)
                shop_cate1_encoder = cate1_hot_encoder.transform(
                    cate1_label_encoder.transform([shop_cate1]))
            if "perpay" in shop_features:
                train_x = np.insert(train_x,
                                    train_x.shape[1],
                                    shop_perpay,
                                    axis=1)
            if "score" in shop_features:
                train_x = np.insert(train_x,
                                    train_x.shape[1],
                                    shop_score,
                                    axis=1)
            if "comment" in shop_features:
                train_x = np.insert(train_x,
                                    train_x.shape[1],
                                    shop_comment,
                                    axis=1)
            if "level" in shop_features:
                train_x = np.insert(train_x,
                                    train_x.shape[1],
                                    shop_level,
                                    axis=1)
            if "cate1" in shop_features:
                for i in range(shop_cate1_encoder.shape[1]):
                    train_x = np.insert(train_x,
                                        train_x.shape[1],
                                        shop_cate1_encoder[0][i],
                                        axis=1)
            '''商家信息添加完毕'''
            '''天气特征'''
            if weather:
                weathers = getOneWeekdayFomExtractedData(
                    extractWeatherInfo(part_data, skipNum, shop_city))
                train_x = np.append(train_x, weathers, axis=1)
            '''天气特征结束'''

            if all_x is None:
                all_x = train_x
                all_y = train_y
            else:
                all_x = np.insert(all_x, all_x.shape[0], train_x, axis=0)
                all_y = np.insert(all_y, all_y.shape[0], train_y, axis=0)

                # '''添加周几'''
                # extract_weekday = getOneWeekdayFomExtractedData(extractWeekday(part_data, skipNum))
                # train_x = np.append(train_x, extract_weekday, axis=1)
                # ''''''

                # train_x = train_x.reshape((train_x.shape[0],
                #                            train_x.shape[1], 1))
                # print model.get_weights()
                # part_counts = []
                # for i in range(7):
                #     weekday = i + 1
                #     part_count = getOneWeekdayFomExtractedData(count, weekday)
                #     part_counts.append(part_count)

        train_x = all_x
        train_y = all_y
        """增广训练集"""
        if Augmented:
            print "augment data"
            new_train_x = np.ndarray(
                (train_x.shape[0] * (augument_time + 1), train_x.shape[1]))
            new_train_y = np.ndarray(
                (train_y.shape[0] * (augument_time + 1), train_y.shape[1]))

            def augument_relu(v):  # 高斯增广。。。。似乎效果不太好,极大可能改变样本
                return v * (1 + 0.01 * np.random.normal())

            def augument_relu2(v):
                return v * 1.05

            end = train_x.shape[0]
            for index in range(end):
                new_train_x[index] = train_x[index]
                new_train_y[index] = train_y[index]
            sert_index = index + 1
            for index in range(end):
                print "%d / %d" % (index, end)
                for t in range(augument_time):
                    new_train_x[sert_index] = train_x[index]
                    # train_x = np.concatenate((train_x, [train_x[index]]), axis=0)
                    # print train_x
                    ov = train_y[index][0]
                    # train_y = np.concatenate((train_y, [[augument_relu(ov)]]), axis=0)
                    new_train_y[sert_index] = [augument_relu2(ov)]
                    sert_index += 1
                    # print train_y
            print "augment finish"
            train_x = new_train_x
            train_y = new_train_y

        if needSaveFeature:
            featureAndLabel = np.concatenate((train_x, train_y), axis=1)
            flDF = pd.DataFrame(featureAndLabel)
            if featureSavePath is None:
                if trainAsTest:
                    featureSavePath = Parameter.projectPath + "lzj/train_feature/%dCatelevel_%sCatename_%dfeatures_%dSameday_%dDay_%dLast" % (
                        cate_level, cate_name, flDF.shape[1] - 1,
                        sameday_backNum, day_back_num, last_N_days)
                else:
                    featureSavePath = Parameter.projectPath + "lzj/feature/%dCatelevel_%sCatename_%dfeatures_%dSameday_%dDay_%dLast" % (
                        cate_level, cate_name, flDF.shape[1] - 1,
                        sameday_backNum, day_back_num, last_N_days)
            if Augmented:
                featureSavePath += ("_Augment%d" % augument_time)

            featureSavePath += ".csv"
            print "save feature in :", featureSavePath
            flDF.to_csv(featureSavePath)
    else:  #有featurePath文件
        if trainAsTest:
            path = Parameter.projectPath + "lzj/train_feature/" + featurePath
        else:
            path = Parameter.projectPath + "lzj/feature/" + featurePath
        flDF = pd.read_csv(path, index_col=0)
        train_x = flDF.values[:, :-1]
        train_y = flDF.values[:, -1:]
        # print train_x
        # print train_y
    '''将t标准化'''
    x_scaler = MinMaxScaler().fit(train_x)
    y_scaler = MinMaxScaler().fit(train_y)
    train_x = x_scaler.transform(train_x)
    train_y = y_scaler.transform(train_y)
    '''标准化结束'''
    """CNN"""
    train_x = np.reshape(train_x, (train_x.shape[0], train_x.shape[1], 1))

    if model_path is None:
        if needCV:
            '''gridsearchCV'''
            # nb_epoch=rnn_epoch, batch_size=batch_size, verbose=verbose
            # input_dim, h1_unit = 16, optimizer = "adagrad", init = "normal"):
            input_dim = [(train_x.shape[1], train_x.shape[2])]
            h1_acqtivation = ["relu"]
            h1_unit = [8, 12, 16, 20]
            model = KerasRegressor(build_fn=create_model_LocallyConnected,
                                   verbose=verbose)
            batch_size = [3, 5, 7, 10]
            epochs = [10, 15, 20, 25, 30]
            param_grid = dict(batch_size=batch_size,
                              nb_epoch=epochs,
                              h1_unit=h1_unit,
                              input_shape=input_dim)
            grid = GridSearchCV(estimator=model,
                                param_grid=param_grid,
                                n_jobs=-1,
                                scoring="neg_mean_squared_error")
            grid.refit = False
            grid_result = grid.fit(train_x, train_y)

            print("Best: %f using %s" %
                  (grid_result.best_score_, grid_result.best_params_))
            for params, mean_score, scores in grid_result.grid_scores_:
                print("%f (%f) with: %r" %
                      (scores.mean(), scores.std(), params))

        if not needCV:
            input_dim = (train_x.shape[1], train_x.shape[2])
            h1_unit = 16 + (time) * 4
            h1_activation = "sigmoid"
            batch_size = 3
            epochs = 40

        else:
            input_dim = (train_x.shape[1], train_x.shape[2])
            epochs = grid_result.best_params_['nb_epoch']
            batch_size = grid_result.best_params_['batch_size']
            h1_unit = grid_result.best_params_["h1_unit"]
            h1_activation = "sigmoid"

        early_stopping = EarlyStopping(monitor='val_loss', patience=2)
        best_model = create_model_LocallyConnected(input_shape=input_dim,
                                                   h1_unit=h1_unit,
                                                   h1_activation=h1_activation)
        hist = best_model.fit(train_x,
                              train_y,
                              verbose=verbose,
                              batch_size=batch_size,
                              nb_epoch=epochs,
                              validation_split=0.1,
                              callbacks=[early_stopping])
        print hist.history

        #保存模型
        if trainAsTest:
            model_save_path = Parameter.projectPath+"lzj/train_model/" + \
                              "%dlast_%ds_%dd_%df_%d_%s_%d_%d_%d_%s.json" \
                              % (last_N_days,sameday_backNum, day_back_num, train_x.shape[1], cate_level, cate_name
                                 , epochs, batch_size, h1_unit, h1_activation)
            saveModel(model_save_path, best_model)
        else:
            model_save_path = Parameter.projectPath+"lzj/model/" + \
                              "%dlast_%ds_%dd_%df_%d_%s_%d_%d_%d_%s.json" \
                              % (last_N_days,sameday_backNum, day_back_num, train_x.shape[1], cate_level, cate_name
                                 ,  epochs, batch_size, h1_unit, h1_activation)
            saveModel(model_save_path, best_model)
    else:  #model_path is not none
        print "get model from " + model_path
        best_model = getModel(model_path)

    format = "%Y-%m-%d"
    if trainAsTest:
        startTime = datetime.datetime.strptime("2016-10-18", format)
    else:
        startTime = datetime.datetime.strptime("2016-11-1", format)
    timedelta = datetime.timedelta(1)
    '''预测商家'''
    model = best_model
    preficts_all = None
    real_all = None

    for j in shopids:
        if ignore_predict:
            if j in ignore_shopids:
                print "ignore predict", j
                # ignores += 1
                continue
        print "predict:", j
        preficts = []
        part_data = all_data[all_data.shopid == j]
        last_14_real_y = None

        if trainAsTest:  #使用训练集后14天作为测试集的话,训练集为前面部分
            last_14_real_y = part_data[len(part_data) - 14:]["count"].values
            part_data = part_data[0:len(part_data) - 14]
        '''预测14天'''
        for i in range(14):
            currentTime = startTime + timedelta * i
            strftime = currentTime.strftime(format)
            # index = getWeekday(strftime) - 1
            # part_count = part_counts[index]
            #取前{sameday_backNum}周同一天的值为特征进行预测
            part_data = part_data.append(
                {
                    "count": 0,
                    "shopid": j,
                    "time": strftime,
                    "weekday": getWeekday(strftime)
                },
                ignore_index=True)
            x = None
            if sameday_backNum != 0:
                x = getOneWeekdayFomExtractedData(
                    extractBackSameday(part_data, sameday_backNum,
                                       part_data.shape[0] - 1,
                                       nan_method_sameday_mean))
            if day_back_num != 0:
                if x is None:
                    x = getOneWeekdayFomExtractedData(
                        extractBackDay(part_data, day_back_num,
                                       part_data.shape[0] - 1,
                                       nan_method_sameday_mean))
                else:
                    x = np.concatenate(
                        (x,
                         getOneWeekdayFomExtractedData(
                             extractBackDay(part_data, day_back_num,
                                            part_data.shape[0] - 1,
                                            nan_method_sameday_mean))),
                        axis=1)
            if weekOrWeekend:
                x = np.concatenate(
                    (x,
                     hot_encoder.transform(
                         getOneWeekdayFomExtractedData(
                             extractWorkOrWeekend(part_data,
                                                  part_data.shape[0] - 1)))),
                    axis=1)

            for feature in other_features:
                x_value = getOneWeekdayFomExtractedData(
                    extractBackWeekValue(part_data, week_backnum,
                                         part_data.shape[0] - 1,
                                         nan_method_sameday_mean, feature))
                x = np.append(x, x_value, axis=1)
            # '''添加周几'''
            # x = np.append(x, getOneWeekdayFomExtractedData(extractWeekday(part_data, part_data.shape[0]-1)), axis=1)
            # ''''''
            '''添加商家信息'''
            index = j - 1
            oneshopinfo = shop_info.ix[index]
            shop_city = oneshopinfo["cityname"]
            shop_perpay = oneshopinfo['perpay'] if not pd.isnull(
                oneshopinfo['perpay']) else 0
            shop_score = oneshopinfo['score'] if not pd.isnull(
                oneshopinfo['score']) else 0
            shop_comment = oneshopinfo['comment'] if not pd.isnull(
                oneshopinfo['comment']) else 0
            shop_level = oneshopinfo['level'] if not pd.isnull(
                oneshopinfo['level']) else 0
            if "perpay" in shop_features:
                x = np.insert(x, x.shape[1], shop_perpay, axis=1)
            if "score" in shop_features:
                x = np.insert(x, x.shape[1], shop_score, axis=1)
            if "comment" in shop_features:
                x = np.insert(x, x.shape[1], shop_comment, axis=1)
            if "level" in shop_features:
                x = np.insert(x, x.shape[1], shop_level, axis=1)
            shop_cate1 = oneshopinfo['cate1']
            if "cate1" in shop_features:
                shop_cate1_encoder = cate1_hot_encoder.transform(
                    cate1_label_encoder.transform([shop_cate1]).reshape(
                        (-1, 1)))
                for i in range(shop_cate1_encoder.shape[1]):
                    x = np.insert(x,
                                  x.shape[1],
                                  shop_cate1_encoder[0][i],
                                  axis=1)
            '''商家信息添加完毕'''
            '''天气特征'''
            if weather:
                weathers = getOneWeekdayFomExtractedData(
                    extractWeatherInfo(part_data, part_data.shape[0] - 1,
                                       shop_city))
                x = np.append(x, weathers, axis=1)
            '''天气特征结束'''
            # for j in range(sameday_backNum):
            #     x.append(train_y[len(train_y) - (j+1)*7][0])
            # x = np.array(x).reshape((1, sameday_backNum))
            x = x_scaler.transform(x)
            """CNN"""
            x = np.reshape(x, (x.shape[0], x.shape[1], 1))
            predict = model.predict(x)
            '''将y还原'''
            if predict.ndim == 2:
                predict = y_scaler.inverse_transform(predict)[0][0]
            elif predict.ndim == 1:
                predict = y_scaler.inverse_transform(predict)[0]
            '''将y还原结束'''
            # print predict
            if (predict <= 0):
                predict == 0
            if addNoiseInResult:
                predict = predict * (
                    1 + 0.05 * abs(np.random.normal(scale=(i + 1) * 0.05)))
            preficts.append(predict)
            part_data.set_value(part_data.shape[0] - 1, "count", predict)

        preficts = (removeNegetive(toInt(np.array(preficts)))).astype(int)
        if preficts_all is None:
            preficts_all = preficts
        else:
            preficts_all = np.insert(preficts_all,
                                     preficts_all.shape[0],
                                     preficts,
                                     axis=0)

        if trainAsTest:
            last_14_real_y = (removeNegetive(toInt(
                np.array(last_14_real_y)))).astype(int)
            if real_all is None:
                real_all = last_14_real_y
            else:
                real_all = np.insert(real_all,
                                     real_all.shape[0],
                                     last_14_real_y,
                                     axis=0)
                # print preficts,last_14_real_y
            print str(j) + ',score:', scoreoneshop(preficts, last_14_real_y)

    # preficts = np.array(preficts)
    shopids = shopids.tolist()
    if ignore_predict:
        for remove_id in ignore_shopids:
            try:
                shopids.remove(remove_id)
            except:
                pass

    preficts_all = preficts_all.reshape((len(shopids), 14))
    if trainAsTest:
        real_all = real_all.reshape((len(shopids), 14))
        preficts_all = np.concatenate((preficts_all, real_all), axis=1)

    preficts_all = np.insert(preficts_all, 0, shopids, axis=1)
    if saveFilePath is not None:
        if model_path is None:
            path = saveFilePath + "%dLast_%ds_%dd_%df_%d_%s_%d_%d_%d_%s_%dshops" \
                                  % (last_N_days,sameday_backNum, day_back_num, train_x.shape[1], cate_level, cate_name
                                     ,  epochs, batch_size, h1_unit, h1_activation,len(shopids))
        else:
            import re
            r = re.compile(
                r"""/(\d+)last_(\d+)s_(\d+)d_(\d+)f_(\d+)_(\S+)_(\d+)_(\d+)_(\d+)_(\w+).json"""
            )
            m = r.search(model_path)
            path = saveFilePath + "%dLast_%ds_%dd_%df_%d_%s_%d_%d_%d_%s_%dshops" \
                                  % (int(m.group(1)),int(m.group(2)), int(m.group(3)), int(m.group(4)), int(m.group(5)), m.group(6)
                                     ,  int(m.group(7)), int(m.group(8)), int(m.group(9)), m.group(10),len(shopids))
        if Augmented:
            path += "_augmented"
        if addNoiseInResult:
            path += "_addNoiseInResult"
        if trainAsTest:
            path = path + "_train"
        path = path + "_%dtime.csv" % time

        print "save in :", path
        np.savetxt(path, preficts_all, fmt="%d", delimiter=",")
    return preficts_all
Ejemplo n.º 23
0
 def startElement(self, name, attrs):
     if name == 'managedObject':
         object = attrs.get('class', None)
         self.__managedObjectList.append(object)
         self.__currentObject = object
         self.__currentChildObjectList = [
         ]  # new object, reset child object list
         self.__paramNameDictionary[self.__currentObject] = [
         ]  # no params yet
     if name == 'childManagedObject':
         child = attrs.get('class', None)
         self.__currentChildObjectList.append(child)
     if name == 'p':
         self.__currentParamLevel += 1
         name = attrs.get('name', None)
         fullName = attrs.get('fullName', None)
         maxOccurs = attrs.get('maxOccurs', None)
         status = attrs.get('status', None)
         hidden = attrs.get('hidden', None)
         # skip parameter, if status is 'deleted' or
         # it is excluded (e.g. change origins)
         if status == 'deleted' or hidden == 'true':
             if self.__skipParam == False:
                 self.__skipParam = True
                 self.__skipLevel = self.__currentParamLevel
         # ignore inner elements too
         if self.__skipParam:
             return
         paramInstance = Parameter.Parameter(
         )  # instantiate Parameter object to store the data
         paramInstance.setName(name)
         paramInstance.setFullName(fullName)
         paramInstance.setMaxOccurs(maxOccurs)
         paramInstance.setStatus(status)
         paramInstance.setManagedObject(self.__currentObject)
         # initialize children list to empty
         self.__currentChildDictionary[self.__currentParamLevel] = []
         # e.g. child parameters are missing, but can't know if there are any yet
         if self.__currentParamLevel > 0:
             self.__currentChildDictionary[self.__currentParamLevel -
                                           1].append(name)
         self.__currentParamStackList.append(name)
         # RAML2.1: no nested lists anymore
         if len(self.__currentParamStackList) > 2:
             print ERROR_MSG
             print "multi-level parameter structure definition in PDDB, not allowed in RAML2.1!"
             print "managedObject:", self.__currentObject
             print "param stack:", self.__currentParamStackList
             sys.exit(1)
         parentParam = self.__getParenParam()
         paramInstance.setParentParam(parentParam)
         self.__storeParamInstance(paramInstance, name)
     # no need to check parameter value fields
     if self.__skipParam:
         return
     if name == 'creation':
         creationPriority = attrs.get('priority', None)
         paramInstance = self.__getParamInstance()
         paramInstance.setCreationPriority(
             creationPriority
         )  # reference, so object in dictionary is updated
     if name == 'modification':
         modificationType = attrs.get('type', None)
         paramInstance = self.__getParamInstance()
         paramInstance.setModificationType(modificationType)
     if name == 'feature':
         features = attrs.get('name', None)
         feaType = attrs.get('type', None)
         # PDDB definitions not very consistent
         if feaType == 'standard' and features.find('optional') != -1:
             feaType = 'optional'
             # print "fix feaType:", features, feaType
         # ignore standard features
         # print "features:", features, "-", feaType
         if features != None and feaType.find('standard') == -1:
             paramInstance = self.__getParamInstance()
             paramInstance.setFeatures(features)
     # one-way parameters should not be present in download plan
     if name == 'interface':
         source = attrs.get('source', None)
         target = attrs.get('target', None)
         bidirection = attrs.get('bidirectional', None)
         paramInstance = self.__getParamInstance()
         paramInstance.setInterface()
         if (source == 'RAC' and target == 'RNC') or (source == 'RNC' and target == 'RAC') or \
              (source == 'RAC' and target == 'IADA') or (source == 'IADA' and target == 'RAC'):
             if bidirection == 'no':
                 paramInstance.setPlanDirection('uni')  # unidirectional
                 if (source == 'RAC'
                         and target == 'RNC') or (source == 'RAC'
                                                  and target == 'IADA'):
                     paramInstance.setInterfaceDirection('uni-down')
                 else:
                     paramInstance.setInterfaceDirection('uni-up')
             else:
                 paramInstance.setPlanDirection('bi')  # birectional
                 paramInstance.setInterfaceDirection('bi')
         if (source == 'EM' and target == 'RNC') or (source == 'RNC'
                                                     and target == 'EM'):
             if bidirection == 'no':
                 paramInstance.setGuiDirection('uni')  # unidirectional
             else:
                 paramInstance.setGuiDirection('bi')  # birectional
     if name == 'property':
         location = attrs.get('name', None)
         if location == 'Location in GUI':
             value = attrs.get('value', None)
             paramInstance = self.__getParamInstance()
             paramInstance.setLocationInGui(value)
     if name == 'simpleType':
         valueBase = attrs.get('base', None)
         paramInstance = self.__getParamInstance()
         paramInstance.setParamType(name)
         paramInstance.setValueBase(valueBase)
     if name == 'complexType':
         paramInstance = self.__getParamInstance()
         paramInstance.setParamType(name)
     if name == 'editing':
         for attrName in attrs.keys():
             if attrName == 'divisor':
                 paramInstance = self.__getParamInstance()
                 paramInstance.setDivisor(attrs.get(attrName))
             if attrName == 'shift':
                 paramInstance = self.__getParamInstance()
                 paramInstance.setShift(attrs.get(attrName))
             if attrName == 'multiplicand':
                 paramInstance = self.__getParamInstance()
                 paramInstance.setMultiplicand(attrs.get(attrName))
             if attrName == 'internalValue':
                 paramInstance = self.__getParamInstance()
                 paramInstance.setInternalValue(attrs.get(attrName))
     if name == 'range':
         for attrName in attrs.keys():
             if attrName == 'minIncl':
                 paramInstance = self.__getParamInstance()
                 paramInstance.setMinValue(attrs.get(attrName))
             if attrName == 'maxIncl':
                 paramInstance = self.__getParamInstance()
                 paramInstance.setMaxValue(attrs.get(attrName))
             if attrName == 'step':
                 paramInstance = self.__getParamInstance()
                 paramInstance.setStep(attrs.get(attrName))
     if name == 'enumeration':
         enum = attrs.get('value', None)
         text = attrs.get('text', "")
         paramInstance = self.__getParamInstance()
         paramInstance.addEnumerationValue(enum)
         paramInstance.addEnumerationText(enum, text)
     if name == 'bit':
         default = attrs.get('default', None)
         fixed = attrs.get('fixed', False)
         paramInstance = self.__getParamInstance()
         paramInstance.addBitValue(default, fixed)
     if name == 'default':  # default value inside'bit' element
         defaultValue = attrs.get('value', None)
         paramInstance = self.__getParamInstance()
         paramInstance.setDefaultValue(defaultValue)
     if name == 'special':
         special = attrs.get('value', None)
         paramInstance = self.__getParamInstance()
         paramInstance.setSpecialValue(special)
     if name == 'minLength':
         min = attrs.get('value', None)
         paramInstance = self.__getParamInstance()
         paramInstance.setMinValue(min)
     if name == 'maxLength':
         max = attrs.get('value', None)
         paramInstance = self.__getParamInstance()
         paramInstance.setMaxValue(max)
Ejemplo n.º 24
0
##
## This class has parameter defintions for all
## parameter used in this code.
##
## Change here for bounds, or import and rewrite.
##
##

from Parameter import *

## Parameters are value, variation, bounds
Om_par = Parameter("Om", 0.3038, 0.1, (0.05, 1.5), "\Omega_m")
Obh2_par = Parameter("Obh2", 0.02234, 0.0002, (0.02, 0.025), "\Omega_{b}h^2")
h_par = Parameter("h", 0.6821, 0.05, (0.4, 1.0), "h")
mnu_par = Parameter("mnu", 0.06, 0.1, (0, 1.0), "\Sigma m_{\\nu}")
Nnu_par = Parameter("Nnu", 3.046, 0.5, (3.046, 5.046), "N_{\\rm eff}")

Ok_par = Parameter("Ok", 0.0, 0.1, (-1.5, 1.5), "\Omega_k")
w_par = Parameter("w", -1.0, 0.1, (-2.0, 0.0), "w_0")
wa_par = Parameter("wa", 0.0, 0.1, (-2.0, 2.0), "w_a")

## this is the prefactor parameter c/rdH0
Pr_par = Parameter("Pr", 28.6, 4, (5, 70), "c/(H_0r_d)")

## Poly Cosmology Parameters
Om1_par = Parameter("Om1", 0.0, 0.1, (-3, 3), "\Omega_1")
Om2_par = Parameter("Om2", 0.0, 0.1, (-3, 3), "\Omega_2")

## JordiCDM Cosmology Parameters
q_par = Parameter("q", 0.0, 0.2, (0, 1), "q")
za_par = Parameter("za", 3, 1.0, (2, 10), "z_a")
Ejemplo n.º 25
0
paramType = "prtype"
paramName = "prname"

tree = ET.ElementTree(file='protocol.xml')
root = tree.getroot()

# 读取所有类
cVector = []
for elem in root:
    if elem.tag == classTag:
        tClass = N.ClassNode(elem.attrib[className])
        for methods in elem:
            if methods.tag == methodTag:
                tMethods = M.Method(methods.attrib[methodName],
                                    methods.attrib[methodFlow])
                for params in methods:
                    if params.tag == paramTag:
                        tParam = Parameter.Parameter(params.attrib[paramName],
                                                     params.attrib[paramType])
                        tMethods.appendPara(tParam)
                tClass.addMethod(tMethods)
        cVector.append(tClass)

print(cVector)
for t in cVector:
    t.display()

for t in cVector:
    dec = decode.Decode(t)
    dec.decode()
Ejemplo n.º 26
0
    def main(self, is_all):
        """
        Run AD main function
        :param is_all:
        :return:
        """

        Job_id_list = []
        Job_done = 0
        Job_total_count = {}

        parameter_function = Parameter.parameter(Global.get_comm_week(), 0)

        if str("all") == str(is_all):
            run_country = parameter_function.AllCountry()
            return_code = self.Exce(Global.get_master_1_url(), run_country)

            run_ad_object = json.loads(return_code)

            Job_id_list = [run_ad_object["data"]["jobExecutationId"]]
            Job_done = 1
            Job_total_count[run_ad_object["data"][
                "jobExecutationId"]] = run_ad_object["data"]["totalAdCount"]

            logger.info(run_ad_object)
        else:
            run_country = parameter_function.JPN()
            return_code = self.Exce(Global.get_master_1_url(), run_country)

            run_ad_object = json.loads(return_code)

            Job_id_list.append([run_ad_object["data"]["jobExecutationId"]])
            Job_total_count[run_ad_object["data"][
                "jobExecutationId"]] = run_ad_object["data"]["totalAdCount"]

            logger.info(run_ad_object)

            run_country = parameter_function.CHN()
            return_code = self.Exce(Global.get_master_2_url(), run_country)

            run_ad_object = json.loads(return_code)

            Job_id_list.append([run_ad_object["data"]["jobExecutationId"]])
            Job_total_count[run_ad_object["data"][
                "jobExecutationId"]] = run_ad_object["data"]["totalAdCount"]

            logger.info(run_ad_object)

            run_country = parameter_function.USA()
            return_code = self.Exce(Global.get_master_2_url(), run_country)

            run_ad_object = json.loads(return_code)

            Job_id_list.append([run_ad_object["data"]["jobExecutationId"]])
            Job_total_count[run_ad_object["data"][
                "jobExecutationId"]] = run_ad_object["data"]["totalAdCount"]

            logger.info(run_ad_object)

            run_country = parameter_function.Other()
            return_code = self.Exce(Global.get_master_2_url(), run_country)

            run_ad_object = json.loads(return_code)

            Job_id_list.append([run_ad_object["data"]["jobExecutationId"]])
            Job_total_count[run_ad_object["data"][
                "jobExecutationId"]] = run_ad_object["data"]["totalAdCount"]

            logger.info(run_ad_object)

            Job_done = 4

        Global.set_job_id_list(Job_id_list)
        Global.set_job_done(Job_done)
        Global.set_job_total_count(Job_total_count)
Ejemplo n.º 27
0
##
## This class has parameter defintions for all
## parameter used in this code.
##
## Change here for bounds, or import and rewrite.
##
##

from Parameter import *

## Parameters are value, variation, bounds
#0.3038, 0.02234, 0.6821
Om_par = Parameter("Om", 0.3038, 0.05, (0.05, 1.5), "\Omega_m*")
Obh2_par = Parameter("Obh2", 0.02234, 0.0002, (0.02, 0.025), "\Omega_bh^2")
h_par = Parameter("h", 0.6821, 0.05, (0.4, 1.0), "h")
mnu_par = Parameter("mnu", 0.06, 0.1, (0, 1.0), "\Sigma m_{\\nu}")
Nnu_par = Parameter("Nnu", 3.046, 0.5, (3.046, 5.046), "N_{\\rm eff}")

Ok_par = Parameter("Ok", 0.0, 0.1, (-1.5, 1.5), "\Omega_k")
w_par = Parameter("w", 1.0, 0.1, (-0.5, 2.0), "w_0")
wa_par = Parameter("wa", 1.0, 0.1, (-0.5, 2.0), "w_a")
wb_par = Parameter("wb", 0.7, 0.2, (-2., 3.0), "w_b")
wc_par = Parameter("wc", 0.7, 0.2, (-3., 5.0), "w_c")

## this is the prefactor parameter c/rdH0
Pr_par = Parameter("Pr", 28.6, 4, (5, 70), "c/(H_0r_d)")

## Poly Cosmology Parameters
Om1_par = Parameter("Om1", 0.0, 0.1, (-3, 3), "\Omega_1")
Om2_par = Parameter("Om2", 0.0, 0.1, (-3, 3), "\Omega_2")
Ejemplo n.º 28
0
def initialize():
    pm = Parameter.Parameter(
    )  #Parameter werden initialisiert (in Parameter.py)
    us = units.units(pm)  #noch units initialiseren
    return (pm, us)
Ejemplo n.º 29
0
Archivo: test.py Proyecto: Mu-L/hikyuu
import TradeCost

import Environment
import Condition
import MoneyManager
import Signal
import Stoploss
import ProfitGoal
import Slippage
import AllocateFunds

if __name__ == "__main__":

    suite = unittest.TestSuite()
    suite.addTest(Datetime.suite())
    suite.addTest(Parameter.suite())

    suite.addTest(MarketInfo.suite())
    suite.addTest(StockTypeInfo.suite())
    suite.addTest(Stock.suite())
    suite.addTest(KData.suite())
    suite.addTest(Indicator.suite())
    suite.addTest(TradeCost.suite())

    suite.addTest(Environment.suite())
    suite.addTest(Environment.suiteTestCrtEV())
    suite.addTest(Condition.suite())
    suite.addTest(Condition.suiteTestCrtCN())
    suite.addTest(MoneyManager.suite())
    suite.addTest(MoneyManager.suiteTestCrtMM())
    suite.addTest(Signal.suite())
Ejemplo n.º 30
0
def predictAllShop_ANN2_HPS(all_data,
                            trainAsTest=False,
                            saveFilePath=None,
                            featurePath=None,
                            cate_level=0,
                            cate_name=None,
                            featureSavePath=None,
                            needSaveFeature=False,
                            ignore_shopids=[],
                            needCV=False,
                            model_path=None):
    """
    通过gridsearch找超参数
    :param trainAsTest: 是否使用训练集后14天作为测试集
    :param model: 某个模型
    :param saveFilePath
    :param featurePath:
    :param cate_level:
    :param cate_name:
    :param featureSavePath:
    :param needSaveFeature:
    :param ignore_shopids:
    :param create_model_function:
    :param needCV
    :return:
    """

    verbose = 2
    last_N_days = 70
    #记录已经被忽略的商店数量
    ignores = 0
    shopids = None
    shop_need_to_predict = 2000
    if (cate_level is 0):
        shopids = np.arange(1, 1 + shop_need_to_predict, 1)
    else:
        shopids = Parameter.extractShopValueByCate(cate_level, cate_name)
    shop_info = pd.read_csv(Parameter.shopinfopath,
                            names=[
                                "shopid", "cityname", "locationid", "perpay",
                                "score", "comment", "level", "cate1", "cate2",
                                "cate3"
                            ])

    weather = True
    weekOrWeekend = True
    day_back_num = 21
    sameday_backNum = 7
    week_backnum = 3
    other_features = [statistic_functon_mean, statistic_functon_median]
    other_features = []
    shop_features = ["perpay", "comment", "score", "level"]
    shop_features = []
    #是否是周末hot_encoder
    hot_encoder = onehot([[1], [0]])
    #类别1hot_encoder
    cate1_list = np.unique(shop_info['cate1'])
    cate1_label_encoder = labelEncoder(cate1_list)
    cate1_list2 = cate1_label_encoder.transform(cate1_list).reshape((-1, 1))
    cate1_hot_encoder = onehot(cate1_list2)

    if featurePath is None:
        all_x = None
        all_y = None
        for shopid in shopids:
            if shopid in ignore_shopids:
                print "ignore get train", shopid
                ignores += 1
                continue
            print "get ", shopid, " train"
            part_data = all_data[all_data.shopid == shopid]
            last_14_real_y = None
            # 取出一部分做训练集
            if trainAsTest:  #使用训练集后14天作为测试集的话,训练集为前面部分
                last_14_real_y = part_data[len(part_data) -
                                           14:]["count"].values
                part_data = part_data[0:len(part_data) - 14]
            # print last_14_real_y
            '''确定跳过前面多少天的数据'''
            skipNum = part_data.shape[0] - last_N_days
            if skipNum < 0:
                skipNum = 0
            train_x = None
            '''获取特征'''
            if sameday_backNum != 0:  #sameday
                sameday = extractBackSameday(part_data, sameday_backNum,
                                             skipNum, nan_method_sameday_mean)
                train_x = getOneWeekdayFomExtractedData(sameday)
            if day_back_num != 0:  #day
                if train_x is not None:
                    train_x = np.concatenate(
                        (train_x,
                         getOneWeekdayFomExtractedData(
                             extractBackDay(part_data, day_back_num, skipNum,
                                            nan_method_sameday_mean))),
                        axis=1)
                else:
                    train_x = getOneWeekdayFomExtractedData(
                        extractBackDay(part_data, day_back_num, skipNum,
                                       nan_method_sameday_mean))
            if weekOrWeekend:  #weekOrWeekend
                ws = getOneWeekdayFomExtractedData(
                    extractWorkOrWeekend(part_data, skipNum))
                train_x = np.concatenate((train_x, hot_encoder.transform(ws)),
                                         axis=1)

            count = extractCount(part_data, skipNum)
            train_y = getOneWeekdayFomExtractedData(count)
            for feature in other_features:
                value = getOneWeekdayFomExtractedData(
                    extractBackWeekValue(part_data, week_backnum, skipNum,
                                         nan_method_sameday_mean, feature))
                train_x = np.append(train_x, value, axis=1)
            '''添加商家信息'''
            # print train_x,train_x.shape
            index = shopid - 1
            oneshopinfo = shop_info.ix[index]
            shop_city = oneshopinfo['cityname']
            shop_perpay = oneshopinfo['perpay'] if not pd.isnull(
                oneshopinfo['perpay']) else 0
            shop_score = oneshopinfo['score'] if not pd.isnull(
                oneshopinfo['score']) else 0
            shop_comment = oneshopinfo['comment'] if not pd.isnull(
                oneshopinfo['comment']) else 0
            shop_level = oneshopinfo['level'] if not pd.isnull(
                oneshopinfo['level']) else 0
            shop_cate1 = oneshopinfo['cate1']
            import warnings
            with warnings.catch_warnings():
                warnings.simplefilter("ignore", category=DeprecationWarning)
                shop_cate1_encoder = cate1_hot_encoder.transform(
                    cate1_label_encoder.transform([shop_cate1]))
            if "perpay" in shop_features:
                train_x = np.insert(train_x,
                                    train_x.shape[1],
                                    shop_perpay,
                                    axis=1)
            if "score" in shop_features:
                train_x = np.insert(train_x,
                                    train_x.shape[1],
                                    shop_score,
                                    axis=1)
            if "comment" in shop_features:
                train_x = np.insert(train_x,
                                    train_x.shape[1],
                                    shop_comment,
                                    axis=1)
            if "level" in shop_features:
                train_x = np.insert(train_x,
                                    train_x.shape[1],
                                    shop_level,
                                    axis=1)
            if "cate1" in shop_features:
                for i in range(shop_cate1_encoder.shape[1]):
                    train_x = np.insert(train_x,
                                        train_x.shape[1],
                                        shop_cate1_encoder[0][i],
                                        axis=1)
            '''商家信息添加完毕'''
            '''天气特征'''
            if weather:
                weathers = getOneWeekdayFomExtractedData(
                    extractWeatherInfo(part_data, skipNum, shop_city))
                train_x = np.append(train_x, weathers, axis=1)
            '''天气特征结束'''

            if all_x is None:
                all_x = train_x
                all_y = train_y
            else:
                all_x = np.insert(all_x, all_x.shape[0], train_x, axis=0)
                all_y = np.insert(all_y, all_y.shape[0], train_y, axis=0)

                # '''添加周几'''
                # extract_weekday = getOneWeekdayFomExtractedData(extractWeekday(part_data, skipNum))
                # train_x = np.append(train_x, extract_weekday, axis=1)
                # ''''''

                # train_x = train_x.reshape((train_x.shape[0],
                #                            train_x.shape[1], 1))
                # print model.get_weights()
                # part_counts = []
                # for i in range(7):
                #     weekday = i + 1
                #     part_count = getOneWeekdayFomExtractedData(count, weekday)
                #     part_counts.append(part_count)

        train_x = all_x
        train_y = all_y

        if needSaveFeature:
            featureAndLabel = np.concatenate((train_x, train_y), axis=1)
            flDF = pd.DataFrame(featureAndLabel)
            if featureSavePath is None:
                if trainAsTest:
                    featureSavePath = Parameter.projectPath + "lzj/train_feature/%dCatelevel_%sCatename_%dfeatures_%dSameday_%dDay_%dLast.csv" % (
                        cate_level, cate_name, flDF.shape[1] - 1,
                        sameday_backNum, day_back_num, last_N_days)
                else:
                    featureSavePath = Parameter.projectPath + "lzj/feature/%dCatelevel_%sCatename_%dfeatures_%dSameday_%dDay_%dLast.csv" % (
                        cate_level, cate_name, flDF.shape[1] - 1,
                        sameday_backNum, day_back_num, last_N_days)
            flDF.to_csv(featureSavePath)
    else:  #有featurePath文件
        if trainAsTest:
            path = Parameter.projectPath + "lzj/train_feature/" + featurePath
        else:
            path = Parameter.projectPath + "lzj/feature/" + featurePath
        flDF = pd.read_csv(path, index_col=0)
        train_x = flDF.values[:, :-1]
        train_y = flDF.values[:, -1:]
        # print train_x
        # print train_y
    '''将t标准化'''
    x_scaler = MinMaxScaler().fit(train_x)
    y_scaler = MinMaxScaler().fit(train_y)
    train_x = x_scaler.transform(train_x)
    train_y = y_scaler.transform(train_y)
    '''标准化结束'''

    if model_path is None:
        if needCV:
            '''gridsearchCV'''
            # nb_epoch=rnn_epoch, batch_size=batch_size, verbose=verbose
            # input_dim, h1_unit = 16, optimizer = "adagrad", init = "normal"):
            input_dim = [train_x.shape[1]]
            h1_activation = ["relu"]
            h1_unit = [8, 12, 16, 20]
            h2_unit = [2, 4, 6, 8, 10]
            model = KerasRegressor(build_fn=create_model2, verbose=verbose)
            batch_size = [3, 5, 7, 10]
            epochs = [10, 15, 20, 25, 30, 40]
            param_grid = dict(batch_size=batch_size,
                              nb_epoch=epochs,
                              h1_unit=h1_unit,
                              h2_unit=h2_unit,
                              input_dim=input_dim)
            grid = GridSearchCV(estimator=model,
                                param_grid=param_grid,
                                n_jobs=-1,
                                scoring="neg_mean_squared_error")
            grid.refit = False
            grid_result = grid.fit(train_x, train_y)

            print("Best: %f using %s" %
                  (grid_result.best_score_, grid_result.best_params_))
            for params, mean_score, scores in grid_result.grid_scores_:
                print("%f (%f) with: %r" %
                      (scores.mean(), scores.std(), params))

        if not needCV:
            input_dim = train_x.shape[1]
            h1_activation = "relu"
            h1_unit = 12
            h2_unit = 8
            batch_size = 3
            epochs = 80

        else:
            input_dim = train_x.shape[1]
            epochs = grid_result.best_params_['nb_epoch']
            batch_size = grid_result.best_params_['batch_size']
            h1_unit = grid_result.best_params_["h1_unit"]
            h2_unit = grid_result.best_params_["h2_unit"]
            h1_activation = "relu"

        early_stopping = EarlyStopping(monitor='val_loss', patience=2)
        best_model = create_model2(input_dim=input_dim,
                                   h1_unit=h1_unit,
                                   h2_unit=h2_unit)
        hist = best_model.fit(train_x,
                              train_y,
                              verbose=verbose,
                              batch_size=batch_size,
                              nb_epoch=epochs,
                              validation_split=0.1,
                              callbacks=[early_stopping])
        print hist.history

        #保存模型
        if trainAsTest:
            model_save_path = Parameter.projectPath+"lzj/train_model/" + \
                              "%dlast_%ds_%dd_%df_%d_%s_%d_%d_%d_%s_%d_%s.json" \
                              % (last_N_days,sameday_backNum, day_back_num, train_x.shape[1], cate_level, cate_name
                                 , epochs, batch_size, h1_unit, h1_activation,h2_unit,h1_activation)
        else:
            model_save_path = Parameter.projectPath+"lzj/model/" + \
                              "%dlast_%ds_%dd_%df_%d_%s_%d_%d_%d_%s_%d_%s.json" \
                              % (last_N_days,sameday_backNum, day_back_num, train_x.shape[1], cate_level, cate_name
                                 ,  epochs, batch_size, h1_unit, h1_activation,h2_unit,h1_activation)
        print "model save in :", model_save_path
        saveModel(model_save_path, best_model)
    else:  #model_path is not none
        best_model = getModel(model_path)

    format = "%Y-%m-%d"
    if trainAsTest:
        startTime = datetime.datetime.strptime("2016-10-18", format)
    else:
        startTime = datetime.datetime.strptime("2016-11-1", format)
    timedelta = datetime.timedelta(1)
    '''预测所有商家'''
    model = best_model
    preficts_all = None
    real_all = None
    for j in shopids:
        if j in ignore_shopids:
            print "ignore predict", j
            continue
        print "predict:", j
        preficts = []
        part_data = all_data[all_data.shopid == j]
        last_14_real_y = None

        if trainAsTest:  #使用训练集后14天作为测试集的话,训练集为前面部分
            last_14_real_y = part_data[len(part_data) - 14:]["count"].values
            part_data = part_data[0:len(part_data) - 14]
        '''预测14天'''
        for i in range(14):
            currentTime = startTime + timedelta * i
            strftime = currentTime.strftime(format)
            # index = getWeekday(strftime) - 1
            # part_count = part_counts[index]
            #取前{sameday_backNum}周同一天的值为特征进行预测
            part_data = part_data.append(
                {
                    "count": 0,
                    "shopid": j,
                    "time": strftime,
                    "weekday": getWeekday(strftime)
                },
                ignore_index=True)
            x = None
            if sameday_backNum != 0:
                x = getOneWeekdayFomExtractedData(
                    extractBackSameday(part_data, sameday_backNum,
                                       part_data.shape[0] - 1,
                                       nan_method_sameday_mean))
            if day_back_num != 0:
                if x is None:
                    x = getOneWeekdayFomExtractedData(
                        extractBackDay(part_data, day_back_num,
                                       part_data.shape[0] - 1,
                                       nan_method_sameday_mean))
                else:
                    x = np.concatenate(
                        (x,
                         getOneWeekdayFomExtractedData(
                             extractBackDay(part_data, day_back_num,
                                            part_data.shape[0] - 1,
                                            nan_method_sameday_mean))),
                        axis=1)
            if weekOrWeekend:
                x = np.concatenate(
                    (x,
                     hot_encoder.transform(
                         getOneWeekdayFomExtractedData(
                             extractWorkOrWeekend(part_data,
                                                  part_data.shape[0] - 1)))),
                    axis=1)

            for feature in other_features:
                x_value = getOneWeekdayFomExtractedData(
                    extractBackWeekValue(part_data, week_backnum,
                                         part_data.shape[0] - 1,
                                         nan_method_sameday_mean, feature))
                x = np.append(x, x_value, axis=1)
            # '''添加周几'''
            # x = np.append(x, getOneWeekdayFomExtractedData(extractWeekday(part_data, part_data.shape[0]-1)), axis=1)
            # ''''''
            '''添加商家信息'''
            index = j - 1
            oneshopinfo = shop_info.ix[index]
            shop_city = oneshopinfo["cityname"]
            shop_perpay = oneshopinfo['perpay'] if not pd.isnull(
                oneshopinfo['perpay']) else 0
            shop_score = oneshopinfo['score'] if not pd.isnull(
                oneshopinfo['score']) else 0
            shop_comment = oneshopinfo['comment'] if not pd.isnull(
                oneshopinfo['comment']) else 0
            shop_level = oneshopinfo['level'] if not pd.isnull(
                oneshopinfo['level']) else 0
            if "perpay" in shop_features:
                x = np.insert(x, x.shape[1], shop_perpay, axis=1)
            if "score" in shop_features:
                x = np.insert(x, x.shape[1], shop_score, axis=1)
            if "comment" in shop_features:
                x = np.insert(x, x.shape[1], shop_comment, axis=1)
            if "level" in shop_features:
                x = np.insert(x, x.shape[1], shop_level, axis=1)
            shop_cate1 = oneshopinfo['cate1']
            if "cate1" in shop_features:
                shop_cate1_encoder = cate1_hot_encoder.transform(
                    cate1_label_encoder.transform([shop_cate1]).reshape(
                        (-1, 1)))
                for i in range(shop_cate1_encoder.shape[1]):
                    x = np.insert(x,
                                  x.shape[1],
                                  shop_cate1_encoder[0][i],
                                  axis=1)
            '''商家信息添加完毕'''
            '''天气特征'''
            if weather:
                weathers = getOneWeekdayFomExtractedData(
                    extractWeatherInfo(part_data, part_data.shape[0] - 1,
                                       shop_city))
                x = np.append(x, weathers, axis=1)
            x = x_scaler.transform(x)
            '''天气特征结束'''
            # for j in range(sameday_backNum):
            #     x.append(train_y[len(train_y) - (j+1)*7][0])
            # x = np.array(x).reshape((1, sameday_backNum))

            # print x
            # x = x.reshape(1, sameday_backNum, 1)
            predict = model.predict(x)
            if predict.ndim == 2:
                predict = y_scaler.inverse_transform(predict)[0][0]
            elif predict.ndim == 1:
                predict = y_scaler.inverse_transform(predict)[0]
            # print predict
            if (predict <= 0):
                predict == 1
            preficts.append(predict)
            part_data.set_value(part_data.shape[0] - 1, "count", predict)

        preficts = (removeNegetive(toInt(np.array(preficts)))).astype(int)
        if preficts_all is None:
            preficts_all = preficts
        else:
            preficts_all = np.insert(preficts_all,
                                     preficts_all.shape[0],
                                     preficts,
                                     axis=0)

        if trainAsTest:
            last_14_real_y = (removeNegetive(toInt(
                np.array(last_14_real_y)))).astype(int)
            if real_all is None:
                real_all = last_14_real_y
            else:
                real_all = np.insert(real_all,
                                     real_all.shape[0],
                                     last_14_real_y,
                                     axis=0)
                # print preficts,last_14_real_y
            print str(j) + ',score:', scoreoneshop(preficts, last_14_real_y)

    # preficts = np.array(preficts)
    preficts_all = preficts_all.reshape((len(shopids) - ignores, 14))
    if trainAsTest:
        real_all = real_all.reshape((len(shopids) - ignores, 14))
        preficts_all = np.concatenate((preficts_all, real_all), axis=1)
    shopids = shopids.tolist()
    for remove in ignore_shopids:
        try:
            shopids.remove(remove)
        except:
            pass
    preficts_all = np.insert(preficts_all, 0, shopids, axis=1)
    if saveFilePath is not None:
        path = saveFilePath + "%dLast_%ds_%dd_%df_%d_%s_%d_%d_%d_%s_%d_%s_%dshops" \
                              % (last_N_days,sameday_backNum, day_back_num, train_x.shape[1], cate_level, cate_name
                                 ,  epochs, batch_size, h1_unit, h1_activation,h2_unit,h1_activation,len(shopids)-ignores)
        if trainAsTest:
            path = path + "_train.csv"
        else:
            path = path + ".csv"

        print "save in :", path
        np.savetxt(path, preficts_all, fmt="%d", delimiter=",")
    return preficts_all
Ejemplo n.º 31
0
        i += 1
    finalresults = ("%s %s %d finalresult.csv" %
                    (strategyName, domain_symbol, K_MIN))
    resultlist.to_csv(finalresults)
    return resultlist


if __name__ == '__main__':
    # ====================参数和文件夹设置======================================
    indexcols = Parameter.ResultIndexDic
    strategyParameterSet = {}
    if not Parameter.multi_symbol_bt_swtich:
        strategy_bt_parameter = []
        # 策略参数设置
        strategy_name = Parameter.strategy_name
        strategy_para_name_list = Parameter.get_strategy_para_name_list(
            strategy_name)
        # 单品种单周期模式
        default_para_dic = Parameter.strategy_para_dic[strategy_name]
        paradic = {
            'strategy_name': strategy_name,
            'exchange_id': Parameter.exchange_id,
            'sec_id': Parameter.sec_id,
            'K_MIN': Parameter.K_MIN,
            'startdate': Parameter.startdate,
            'enddate': Parameter.enddate,
            'result_para_dic': Parameter.result_para_dic,
            'new_para': default_para_dic['new_para']
        }
        if default_para_dic['new_para']:
            # 参数新增模式下,加载默认参数
            for para_name in strategy_para_name_list:
Ejemplo n.º 32
0
def Main(contigfile_, tuple_of_bamfiles, tuple_of_means, tuple_of_thresholds,
         edge_support, read_len, cont_threshold, ratio, output_dest, std_dev,
         covcutoff, haplratio, haplthreshold, detect_haplotype,
         detect_duplicate, gff_file, fosmidpool, mapquality):

    from time import time
    import CreateGraph as CG
    import MakeScaffolds as MS
    import GenerateOutput as GO
    import Parameter
    from copy import deepcopy
    tot_start = time()
    F = [
    ]  #list of (ordered) lists of tuples containing (contig_name, direction, position, length, links). The tuple is a contig within a scaffold and the list of tuples is the scaffold.
    Scaffolds = {
    }  #scaffold dict with contig objects for easy fetching of all contigs in a scaffold
    n = len(tuple_of_bamfiles)  # number of libraries we have
    param = Parameter.parameter(
    )  # object containing all parameters (user specified, defaulted and comuted along tha way.)
    param.scaffold_indexer = 1  # global indicator for scaffolds, used to index scaffolds when they are created
    param.map_quality = mapquality
    param.rel_weight = ratio
    Contigs = {}  # contig dict that stores contig objects

    if not os.path.exists(output_dest):
        os.makedirs(output_dest)
    param.information_file = open(os.path.join(output_dest + 'Statistics.txt'),
                                  'w')

    Information = param.information_file
    open(output_dest + '/haplotypes.fa', 'w')
    #Read in the sequences of the contigs in memory
    contigfile = open(contigfile_, 'r')
    C_dict = ReadInContigseqs(contigfile)
    #C_dict = {}
    param.gff_file = gff_file
    #iterate over libraries
    param.first_lib = True
    for i in range(0, n):
        start = time()
        param.bamfile = tuple_of_bamfiles[i]
        param.mean_ins_size = tuple_of_means[i]
        param.ins_size_threshold = tuple_of_thresholds[i]
        param.edgesupport = edge_support[i]
        param.read_len = read_len[i]
        param.output_directory = output_dest
        param.std_dev_ins_size = std_dev[i]
        param.contig_threshold = cont_threshold[i]
        param.cov_cutoff = covcutoff[i]
        param.hapl_ratio = haplratio
        param.hapl_threshold = haplthreshold
        param.detect_haplotype = detect_haplotype
        param.detect_duplicate = detect_duplicate
        param.fosmidpool = fosmidpool
        print >> Information, '\nPASS ' + str(i + 1) + '\n\n'
        print 'Starting scaffolding with library: ', param.bamfile
        (G, Contigs, Scaffolds, F, param) = CG.PE(
            Contigs, Scaffolds, F, Information, output_dest, C_dict, param
        )  #Create graph, single out too short contigs/scaffolds and store them in F
        param.first_lib = False  #not the first lib any more
        if G == None:
            print '0 contigs/super-contigs passed the length criteria of this step. Exiting and printing results.. '
            break
        elapsed = time() - start
        print >> Information, 'Time elapsed for creating graph, iteration ' + str(
            i) + ': ' + str(elapsed) + '\n'
        start = time()
        (Contigs, Scaffolds, F, param) = MS.Algorithm(
            G, Contigs, Scaffolds, F, Information, C_dict, param
        )  # Make scaffolds, store the complex areas (consisting of contig/scaffold) in F, store the created scaffolds in Scaffolds, update Contigs
        elapsed = time() - start
        print >> Information, 'Time elapsed for making scaffolds, iteration ' + str(
            i) + ': ' + str(elapsed) + '\n'

        print 'Writing out scaffolding results for step', i + 1, ' ...'
        Scaffolds_copy = deepcopy(Scaffolds)
        Contigs_copy = deepcopy(Contigs)
        F_copy = deepcopy(F)
        for scaffold_ in Scaffolds_copy.keys(
        ):  #iterate over keys in hash, so that we can remove keys while iterating over it
            ###  Go to function and print to F
            ### Remove Scaf_obj from Scaffolds and Contig_obj from contigs
            S_obj = Scaffolds_copy[scaffold_]
            list_of_contigs = S_obj.contigs  #list of contig objects contained in scaffold object
            Contigs_copy, F_copy = GO.WriteToF(F_copy, Contigs_copy,
                                               list_of_contigs)
            del Scaffolds_copy[scaffold_]
        #print F
        GO.PrintOutput(F_copy, C_dict, Information, output_dest, param, i + 1)

    ### Calculate stats for last scaffolding step
    scaf_lengths = [
        Scaffolds[scaffold_].s_length for scaffold_ in Scaffolds.keys()
    ]
    sorted_lengths = sorted(scaf_lengths, reverse=True)
    N50, L50 = CG.CalculateStats(sorted_lengths, param)
    param.current_L50 = L50
    param.current_N50 = N50
    #    ### Call a print scaffolds function here for remaining scaffolds that has "passed" all library levels
    #    for scaffold_ in Scaffolds.keys(): #iterate over keys in hash, so that we can remove keys while iterating over it
    #        ###  Go to function and print to F
    #        ### Remove Scaf_obj from Scaffolds and Contig_obj from contigs
    #        S_obj=Scaffolds[scaffold_]
    #        list_of_contigs=S_obj.contigs   #list of contig objects contained in scaffold object
    #        Contigs, F = GO.WriteToF(F,Contigs,list_of_contigs)
    #        del Scaffolds[scaffold_]
    #    #print F
    #    GO.PrintOutput(F,C_dict,Information,output_dest)

    elapsed = time() - tot_start
    print >> Information, 'Total time for scaffolding: ' + str(elapsed) + '\n'
    print 'Finished\n\n '